|
|
@@ -228,6 +228,247 @@ export async function generateArticle(content, provider = 'volces1-5') {
|
|
228
|
228
|
}
|
|
229
|
229
|
}
|
|
230
|
230
|
|
|
|
231
|
+/**
|
|
|
232
|
+ * 计算两个字符串之间的Levenshtein距离(编辑距离)
|
|
|
233
|
+ * @param {string} a - 第一个字符串
|
|
|
234
|
+ * @param {string} b - 第二个字符串
|
|
|
235
|
+ * @returns {number} - 编辑距离
|
|
|
236
|
+ */
|
|
|
237
|
+function levenshteinDistance(a, b) {
|
|
|
238
|
+ const matrix = [];
|
|
|
239
|
+
|
|
|
240
|
+ // 初始化矩阵
|
|
|
241
|
+ for (let i = 0; i <= b.length; i++) {
|
|
|
242
|
+ matrix[i] = [i];
|
|
|
243
|
+ }
|
|
|
244
|
+
|
|
|
245
|
+ for (let j = 0; j <= a.length; j++) {
|
|
|
246
|
+ matrix[0][j] = j;
|
|
|
247
|
+ }
|
|
|
248
|
+
|
|
|
249
|
+ // 填充矩阵
|
|
|
250
|
+ for (let i = 1; i <= b.length; i++) {
|
|
|
251
|
+ for (let j = 1; j <= a.length; j++) {
|
|
|
252
|
+ if (b.charAt(i - 1) === a.charAt(j - 1)) {
|
|
|
253
|
+ matrix[i][j] = matrix[i - 1][j - 1];
|
|
|
254
|
+ } else {
|
|
|
255
|
+ matrix[i][j] = Math.min(
|
|
|
256
|
+ matrix[i - 1][j - 1] + 1, // 替换
|
|
|
257
|
+ matrix[i][j - 1] + 1, // 插入
|
|
|
258
|
+ matrix[i - 1][j] + 1 // 删除
|
|
|
259
|
+ );
|
|
|
260
|
+ }
|
|
|
261
|
+ }
|
|
|
262
|
+ }
|
|
|
263
|
+
|
|
|
264
|
+ return matrix[b.length][a.length];
|
|
|
265
|
+}
|
|
|
266
|
+
|
|
|
267
|
+/**
|
|
|
268
|
+ * 增强FormsOfWords,检测文章中单词的变形形式和拼写错误
|
|
|
269
|
+ * @param {Object} jsonObj - 解析后的JSON对象
|
|
|
270
|
+ * @param {string} userWords - 用户提供的单词列表,逗号分隔
|
|
|
271
|
+ * @returns {Object} - 增强后的JSON对象
|
|
|
272
|
+ */
|
|
|
273
|
+export function enhanceFormsOfWords(jsonObj, userWords) {
|
|
|
274
|
+ if (!jsonObj || !userWords) return jsonObj;
|
|
|
275
|
+
|
|
|
276
|
+ // 将用户提供的单词转换为数组并去除空格
|
|
|
277
|
+ const userWordsList = userWords.split(',').map(word => word.trim().toLowerCase());
|
|
|
278
|
+
|
|
|
279
|
+ // 如果没有ArticleEnglish或FormsOfWords,直接返回
|
|
|
280
|
+ if (!jsonObj.ArticleEnglish || !Array.isArray(jsonObj.ArticleEnglish)) {
|
|
|
281
|
+ return jsonObj;
|
|
|
282
|
+ }
|
|
|
283
|
+
|
|
|
284
|
+ // 确保FormsOfWords存在
|
|
|
285
|
+ if (!jsonObj.FormsOfWords) {
|
|
|
286
|
+ jsonObj.FormsOfWords = [];
|
|
|
287
|
+ }
|
|
|
288
|
+
|
|
|
289
|
+ // 从文章中提取所有单词
|
|
|
290
|
+ const allWordsInArticle = [];
|
|
|
291
|
+ jsonObj.ArticleEnglish.forEach(sentence => {
|
|
|
292
|
+ // 移除标点符号,分割成单词
|
|
|
293
|
+ const words = sentence.toLowerCase()
|
|
|
294
|
+ .replace(/[.,\/#!$%\^&\*;:{}=\-_`~()]/g, " ")
|
|
|
295
|
+ .replace(/\s+/g, " ")
|
|
|
296
|
+ .split(" ");
|
|
|
297
|
+
|
|
|
298
|
+ words.forEach(word => {
|
|
|
299
|
+ if (word) allWordsInArticle.push(word.trim());
|
|
|
300
|
+ });
|
|
|
301
|
+ });
|
|
|
302
|
+
|
|
|
303
|
+ // 常见的后缀列表
|
|
|
304
|
+ const commonSuffixes = [
|
|
|
305
|
+ 'ed', 'ing', 's', 'es', 'er', 'est', 'ful', 'ly', 'ment', 'ness', 'ity',
|
|
|
306
|
+ 'tion', 'sion', 'ation', 'able', 'ible', 'al', 'ial', 'ic', 'ical', 'ious',
|
|
|
307
|
+ 'ous', 'ive', 'less', 'y'
|
|
|
308
|
+ ];
|
|
|
309
|
+
|
|
|
310
|
+ // 不规则动词变化表(部分常见的)
|
|
|
311
|
+ const irregularVerbs = {
|
|
|
312
|
+ 'go': ['went', 'gone', 'goes', 'going'],
|
|
|
313
|
+ 'be': ['am', 'is', 'are', 'was', 'were', 'been', 'being'],
|
|
|
314
|
+ 'do': ['did', 'done', 'does', 'doing'],
|
|
|
315
|
+ 'have': ['has', 'had', 'having'],
|
|
|
316
|
+ 'say': ['said', 'says', 'saying'],
|
|
|
317
|
+ 'make': ['made', 'makes', 'making'],
|
|
|
318
|
+ 'take': ['took', 'taken', 'takes', 'taking'],
|
|
|
319
|
+ 'come': ['came', 'comes', 'coming'],
|
|
|
320
|
+ 'see': ['saw', 'seen', 'sees', 'seeing'],
|
|
|
321
|
+ 'know': ['knew', 'known', 'knows', 'knowing'],
|
|
|
322
|
+ 'get': ['got', 'gotten', 'gets', 'getting'],
|
|
|
323
|
+ 'give': ['gave', 'given', 'gives', 'giving'],
|
|
|
324
|
+ 'find': ['found', 'finds', 'finding'],
|
|
|
325
|
+ 'think': ['thought', 'thinks', 'thinking'],
|
|
|
326
|
+ 'tell': ['told', 'tells', 'telling'],
|
|
|
327
|
+ 'become': ['became', 'becomes', 'becoming'],
|
|
|
328
|
+ 'show': ['showed', 'shown', 'shows', 'showing'],
|
|
|
329
|
+ 'leave': ['left', 'leaves', 'leaving'],
|
|
|
330
|
+ 'feel': ['felt', 'feels', 'feeling'],
|
|
|
331
|
+ 'put': ['puts', 'putting'],
|
|
|
332
|
+ 'bring': ['brought', 'brings', 'bringing'],
|
|
|
333
|
+ 'begin': ['began', 'begun', 'begins', 'beginning'],
|
|
|
334
|
+ 'keep': ['kept', 'keeps', 'keeping'],
|
|
|
335
|
+ 'hold': ['held', 'holds', 'holding'],
|
|
|
336
|
+ 'write': ['wrote', 'written', 'writes', 'writing'],
|
|
|
337
|
+ 'stand': ['stood', 'stands', 'standing'],
|
|
|
338
|
+ 'hear': ['heard', 'hears', 'hearing'],
|
|
|
339
|
+ 'let': ['lets', 'letting'],
|
|
|
340
|
+ 'mean': ['meant', 'means', 'meaning'],
|
|
|
341
|
+ 'set': ['sets', 'setting'],
|
|
|
342
|
+ 'meet': ['met', 'meets', 'meeting'],
|
|
|
343
|
+ 'run': ['ran', 'runs', 'running'],
|
|
|
344
|
+ 'pay': ['paid', 'pays', 'paying'],
|
|
|
345
|
+ 'sit': ['sat', 'sits', 'sitting'],
|
|
|
346
|
+ 'speak': ['spoke', 'spoken', 'speaks', 'speaking'],
|
|
|
347
|
+ 'lie': ['lay', 'lain', 'lies', 'lying'],
|
|
|
348
|
+ 'lead': ['led', 'leads', 'leading'],
|
|
|
349
|
+ 'read': ['read', 'reads', 'reading'],
|
|
|
350
|
+ 'sleep': ['slept', 'sleeps', 'sleeping'],
|
|
|
351
|
+ 'win': ['won', 'wins', 'winning'],
|
|
|
352
|
+ 'understand': ['understood', 'understands', 'understanding'],
|
|
|
353
|
+ 'draw': ['drew', 'drawn', 'draws', 'drawing'],
|
|
|
354
|
+ 'sing': ['sang', 'sung', 'sings', 'singing'],
|
|
|
355
|
+ 'fall': ['fell', 'fallen', 'falls', 'falling'],
|
|
|
356
|
+ 'fly': ['flew', 'flown', 'flies', 'flying'],
|
|
|
357
|
+ 'grow': ['grew', 'grown', 'grows', 'growing'],
|
|
|
358
|
+ 'lose': ['lost', 'loses', 'losing'],
|
|
|
359
|
+ 'teach': ['taught', 'teaches', 'teaching'],
|
|
|
360
|
+ 'eat': ['ate', 'eaten', 'eats', 'eating'],
|
|
|
361
|
+ 'drink': ['drank', 'drunk', 'drinks', 'drinking']
|
|
|
362
|
+ };
|
|
|
363
|
+
|
|
|
364
|
+ // 不规则形容词比较级和最高级
|
|
|
365
|
+ const irregularAdjectives = {
|
|
|
366
|
+ 'good': ['better', 'best'],
|
|
|
367
|
+ 'bad': ['worse', 'worst'],
|
|
|
368
|
+ 'far': ['further', 'furthest', 'farther', 'farthest'],
|
|
|
369
|
+ 'little': ['less', 'least'],
|
|
|
370
|
+ 'many': ['more', 'most'],
|
|
|
371
|
+ 'much': ['more', 'most']
|
|
|
372
|
+ };
|
|
|
373
|
+
|
|
|
374
|
+ // 收集所有单词形式
|
|
|
375
|
+ const allForms = new Set();
|
|
|
376
|
+
|
|
|
377
|
+ userWordsList.forEach(originalWord => {
|
|
|
378
|
+ // 添加原始单词
|
|
|
379
|
+ allForms.add(originalWord);
|
|
|
380
|
+
|
|
|
381
|
+ // 检查不规则动词
|
|
|
382
|
+ if (irregularVerbs[originalWord]) {
|
|
|
383
|
+ irregularVerbs[originalWord].forEach(form => allForms.add(form));
|
|
|
384
|
+ }
|
|
|
385
|
+
|
|
|
386
|
+ // 检查不规则形容词
|
|
|
387
|
+ if (irregularAdjectives[originalWord]) {
|
|
|
388
|
+ irregularAdjectives[originalWord].forEach(form => allForms.add(form));
|
|
|
389
|
+ }
|
|
|
390
|
+
|
|
|
391
|
+ // 检查文章中的所有单词,寻找可能的变形和拼写错误
|
|
|
392
|
+ allWordsInArticle.forEach(articleWord => {
|
|
|
393
|
+ // 检查是否是原始单词
|
|
|
394
|
+ if (articleWord === originalWord) {
|
|
|
395
|
+ allForms.add(articleWord);
|
|
|
396
|
+ return;
|
|
|
397
|
+ }
|
|
|
398
|
+
|
|
|
399
|
+ // 检查拼写错误,使用更严格的条件
|
|
|
400
|
+ // 1. 对于短单词(长度<=4),只接受编辑距离为1的情况
|
|
|
401
|
+ // 2. 对于中等长度单词(4<长度<=8),只接受编辑距离为1的情况
|
|
|
402
|
+ // 3. 对于长单词(长度>8),允许编辑距离为2,但有额外限制
|
|
|
403
|
+ if (originalWord.length <= 8) {
|
|
|
404
|
+ // 短单词和中等长度单词使用相同的严格条件
|
|
|
405
|
+ if (articleWord[0] === originalWord[0] && // 首字母必须相同
|
|
|
406
|
+ Math.abs(articleWord.length - originalWord.length) <= 1 && // 长度差不超过1
|
|
|
407
|
+ levenshteinDistance(originalWord, articleWord) === 1) { // 编辑距离恰好为1
|
|
|
408
|
+ allForms.add(articleWord);
|
|
|
409
|
+ }
|
|
|
410
|
+ } else {
|
|
|
411
|
+ // 长单词(长度>8)的条件
|
|
|
412
|
+ if (articleWord[0] === originalWord[0]) { // 首字母必须相同
|
|
|
413
|
+ const editDistance = levenshteinDistance(originalWord, articleWord);
|
|
|
414
|
+
|
|
|
415
|
+ if (editDistance === 1) {
|
|
|
416
|
+ // 编辑距离为1的情况,长度差不超过1
|
|
|
417
|
+ if (Math.abs(articleWord.length - originalWord.length) <= 1) {
|
|
|
418
|
+ allForms.add(articleWord);
|
|
|
419
|
+ }
|
|
|
420
|
+ } else if (editDistance === 2) {
|
|
|
421
|
+ // 编辑距离为2的情况,需要更严格的条件
|
|
|
422
|
+ // 长度差不超过1且单词长度大于8
|
|
|
423
|
+ if (Math.abs(articleWord.length - originalWord.length) <= 1) {
|
|
|
424
|
+ allForms.add(articleWord);
|
|
|
425
|
+ }
|
|
|
426
|
+ }
|
|
|
427
|
+ }
|
|
|
428
|
+ }
|
|
|
429
|
+
|
|
|
430
|
+ // 检查是否是通过添加后缀形成的变形
|
|
|
431
|
+ for (const suffix of commonSuffixes) {
|
|
|
432
|
+ if (articleWord.endsWith(suffix)) {
|
|
|
433
|
+ const stem = articleWord.slice(0, -suffix.length);
|
|
|
434
|
+
|
|
|
435
|
+ // 处理双写字母的情况(如:running -> run)
|
|
|
436
|
+ if (stem.length > 0 && stem[stem.length-1] === stem[stem.length-2]) {
|
|
|
437
|
+ const possibleStem = stem.slice(0, -1);
|
|
|
438
|
+ if (possibleStem === originalWord) {
|
|
|
439
|
+ allForms.add(articleWord);
|
|
|
440
|
+ continue;
|
|
|
441
|
+ }
|
|
|
442
|
+ }
|
|
|
443
|
+
|
|
|
444
|
+ // 处理去e加ing的情况(如:writing -> write)
|
|
|
445
|
+ if (suffix === 'ing' && originalWord.endsWith('e') && stem + 'e' === originalWord) {
|
|
|
446
|
+ allForms.add(articleWord);
|
|
|
447
|
+ continue;
|
|
|
448
|
+ }
|
|
|
449
|
+
|
|
|
450
|
+ // 处理y变i的情况(如:studies -> study)
|
|
|
451
|
+ if ((suffix === 'es' || suffix === 'ed') && originalWord.endsWith('y') &&
|
|
|
452
|
+ stem + 'y' === originalWord) {
|
|
|
453
|
+ allForms.add(articleWord);
|
|
|
454
|
+ continue;
|
|
|
455
|
+ }
|
|
|
456
|
+
|
|
|
457
|
+ // 直接比较
|
|
|
458
|
+ if (stem === originalWord) {
|
|
|
459
|
+ allForms.add(articleWord);
|
|
|
460
|
+ }
|
|
|
461
|
+ }
|
|
|
462
|
+ }
|
|
|
463
|
+ });
|
|
|
464
|
+ });
|
|
|
465
|
+
|
|
|
466
|
+ // 更新FormsOfWords
|
|
|
467
|
+ jsonObj.FormsOfWords = Array.from(new Set([...jsonObj.FormsOfWords, ...allForms]));
|
|
|
468
|
+
|
|
|
469
|
+ return jsonObj;
|
|
|
470
|
+}
|
|
|
471
|
+
|
|
231
|
472
|
/**
|
|
232
|
473
|
* 校验和修复JSON结构
|
|
233
|
474
|
* @param {string} jsonString - JSON字符串
|
|
|
@@ -365,5 +606,6 @@ export function validateAndFixJSON(jsonString) {
|
|
365
|
606
|
|
|
366
|
607
|
export default {
|
|
367
|
608
|
generateArticle,
|
|
|
609
|
+ enhanceFormsOfWords,
|
|
368
|
610
|
validateAndFixJSON
|
|
369
|
611
|
};
|