|
|
@@ -67,162 +67,281 @@ import os from 'os';
|
|
67
|
67
|
* @property {Function} cleanWord - 清理单词中的非字母字符
|
|
68
|
68
|
* @property {Function} extractEnglishWords - 从文本中提取英语单词
|
|
69
|
69
|
*/
|
|
|
70
|
+
|
|
70
|
71
|
// 不规则动词映射
|
|
71
|
72
|
const irregularVerbs = {
|
|
72
|
|
- 'go': ['went', 'gone', 'going', 'goes'],
|
|
73
|
|
- 'be': ['am', 'is', 'are', 'was', 'were', 'been', 'being'],
|
|
74
|
|
- 'do': ['did', 'done', 'doing', 'does'],
|
|
75
|
|
- 'have': ['has', 'had', 'having'],
|
|
76
|
|
- 'say': ['said', 'saying', 'says'],
|
|
77
|
|
- 'make': ['made', 'making', 'makes'],
|
|
78
|
|
- 'get': ['got', 'gotten', 'getting', 'gets'],
|
|
79
|
|
- 'know': ['knew', 'known', 'knowing', 'knows'],
|
|
80
|
|
- 'take': ['took', 'taken', 'taking', 'takes'],
|
|
81
|
|
- 'see': ['saw', 'seen', 'seeing', 'sees'],
|
|
82
|
|
- 'come': ['came', 'coming', 'comes'],
|
|
83
|
|
- 'think': ['thought', 'thinking', 'thinks'],
|
|
84
|
|
- 'look': ['looked', 'looking', 'looks'],
|
|
85
|
|
- 'want': ['wanted', 'wanting', 'wants'],
|
|
86
|
|
- 'give': ['gave', 'given', 'giving', 'gives'],
|
|
87
|
|
- 'use': ['used', 'using', 'uses'],
|
|
88
|
|
- 'find': ['found', 'finding', 'finds'],
|
|
89
|
|
- 'tell': ['told', 'telling', 'tells'],
|
|
90
|
|
- 'ask': ['asked', 'asking', 'asks'],
|
|
91
|
|
- 'work': ['worked', 'working', 'works'],
|
|
92
|
|
- 'seem': ['seemed', 'seeming', 'seems'],
|
|
93
|
|
- 'feel': ['felt', 'feeling', 'feels'],
|
|
94
|
|
- 'try': ['tried', 'trying', 'tries'],
|
|
95
|
|
- 'leave': ['left', 'leaving', 'leaves'],
|
|
96
|
|
- 'call': ['called', 'calling', 'calls'],
|
|
97
|
|
- 'fly': ['flew', 'flown', 'flying', 'flies'],
|
|
98
|
|
- 'break': ['broke', 'broken', 'breaking', 'breaks'],
|
|
99
|
|
- 'buy': ['bought', 'buying', 'buys'],
|
|
100
|
|
- 'bring': ['brought', 'bringing', 'brings'],
|
|
101
|
|
- 'build': ['built', 'building', 'builds'],
|
|
102
|
|
- 'choose': ['chose', 'chosen', 'choosing', 'chooses'],
|
|
103
|
|
- 'catch': ['caught', 'catching', 'catches'],
|
|
104
|
|
- 'draw': ['drew', 'drawn', 'drawing', 'draws'],
|
|
105
|
|
- 'drink': ['drank', 'drunk', 'drinking', 'drinks'],
|
|
106
|
|
- 'drive': ['drove', 'driven', 'driving', 'drives'],
|
|
107
|
|
- 'eat': ['ate', 'eaten', 'eating', 'eats'],
|
|
108
|
|
- 'fall': ['fell', 'fallen', 'falling', 'falls'],
|
|
109
|
|
- 'forget': ['forgot', 'forgotten', 'forgetting', 'forgets'],
|
|
110
|
|
- 'grow': ['grew', 'grown', 'growing', 'grows'],
|
|
111
|
|
- 'hear': ['heard', 'hearing', 'hears'],
|
|
112
|
|
- 'keep': ['kept', 'keeping', 'keeps'],
|
|
113
|
|
- 'lead': ['led', 'leading', 'leads'],
|
|
114
|
|
- 'learn': ['learnt', 'learned', 'learning', 'learns'],
|
|
115
|
|
- 'lose': ['lost', 'losing', 'loses'],
|
|
116
|
|
- 'pay': ['paid', 'paying', 'pays'],
|
|
117
|
|
- 'read': ['read', 'reading', 'reads'],
|
|
118
|
|
- 'run': ['ran', 'run', 'running', 'runs'],
|
|
119
|
|
- 'rise': ['rose', 'risen', 'rising', 'rises'],
|
|
120
|
|
- 'ride': ['rode', 'ridden', 'riding', 'rides'],
|
|
121
|
|
- 'ring': ['rang', 'rung', 'ringing', 'rings'],
|
|
122
|
|
- 'set': ['set', 'setting', 'sets'],
|
|
123
|
|
- 'sit': ['sat', 'sitting', 'sits'],
|
|
124
|
|
- 'speak': ['spoke', 'spoken', 'speaking', 'speaks'],
|
|
125
|
|
- 'spend': ['spent', 'spending', 'spends'],
|
|
126
|
|
- 'stand': ['stood', 'standing', 'stands'],
|
|
127
|
|
- 'swim': ['swam', 'swum', 'swimming', 'swims'],
|
|
128
|
|
- 'teach': ['taught', 'teaching', 'teaches'],
|
|
129
|
|
- 'understand': ['understood', 'understanding', 'understands'],
|
|
130
|
|
- 'write': ['wrote', 'written', 'writing', 'writes'],
|
|
131
|
|
- 'win': ['won', 'winning', 'wins'],
|
|
132
|
|
- 'wear': ['wore', 'worn', 'wearing', 'wears'],
|
|
133
|
|
- 'throw': ['threw', 'thrown', 'throwing', 'throws'],
|
|
134
|
|
- 'sing': ['sang', 'sung', 'singing', 'sings'],
|
|
135
|
|
- 'sleep': ['slept', 'sleeping', 'sleeps'],
|
|
136
|
|
- 'sell': ['sold', 'selling', 'sells'],
|
|
137
|
|
- 'send': ['sent', 'sending', 'sends'],
|
|
138
|
|
- 'put': ['put', 'putting', 'puts'],
|
|
139
|
|
- 'meet': ['met', 'meeting', 'meets'],
|
|
140
|
|
- 'let': ['let', 'letting', 'lets'],
|
|
141
|
|
- 'hold': ['held', 'holding', 'holds'],
|
|
142
|
|
- 'cut': ['cut', 'cutting', 'cuts'],
|
|
143
|
|
- 'cost': ['cost', 'costing', 'costs'],
|
|
144
|
|
- 'become': ['became', 'become', 'becoming', 'becomes'],
|
|
145
|
|
- 'begin': ['began', 'begun', 'beginning', 'begins'],
|
|
146
|
|
- 'bend': ['bent', 'bending', 'bends'],
|
|
147
|
|
- 'bet': ['bet', 'betting', 'bets'],
|
|
148
|
|
- 'bite': ['bit', 'bitten', 'biting', 'bites'],
|
|
149
|
|
- 'blow': ['blew', 'blown', 'blowing', 'blows'],
|
|
150
|
|
- 'feed': ['fed', 'feeding', 'feeds'],
|
|
151
|
|
- 'fight': ['fought', 'fighting', 'fights'],
|
|
152
|
|
- 'freeze': ['froze', 'frozen', 'freezing', 'freezes'],
|
|
153
|
|
- 'hang': ['hung', 'hanging', 'hangs'],
|
|
154
|
|
- 'hide': ['hid', 'hidden', 'hiding', 'hides'],
|
|
155
|
|
- 'hit': ['hit', 'hitting', 'hits'],
|
|
156
|
|
- 'hurt': ['hurt', 'hurting', 'hurts'],
|
|
157
|
|
- 'lay': ['laid', 'laying', 'lays'],
|
|
158
|
|
- 'lie': ['lay', 'lain', 'lying', 'lies'],
|
|
159
|
|
- 'light': ['lit', 'lighting', 'lights'],
|
|
160
|
|
- 'mean': ['meant', 'meaning', 'means'],
|
|
161
|
|
- 'prove': ['proved', 'proven', 'proving', 'proves'],
|
|
162
|
|
- 'shake': ['shook', 'shaken', 'shaking', 'shakes'],
|
|
163
|
|
- 'shine': ['shone', 'shining', 'shines'],
|
|
164
|
|
- 'shoot': ['shot', 'shooting', 'shoots'],
|
|
165
|
|
- 'show': ['showed', 'shown', 'showing', 'shows'],
|
|
166
|
|
- 'shut': ['shut', 'shutting', 'shuts'],
|
|
167
|
|
- 'steal': ['stole', 'stolen', 'stealing', 'steals'],
|
|
168
|
|
- 'stick': ['stuck', 'sticking', 'sticks'],
|
|
169
|
|
- 'strike': ['struck', 'striking', 'strikes'],
|
|
170
|
|
- 'swear': ['swore', 'sworn', 'swearing', 'swears'],
|
|
171
|
|
- 'sweep': ['swept', 'sweeping', 'sweeps'],
|
|
172
|
|
- 'tear': ['tore', 'torn', 'tearing', 'tears'],
|
|
173
|
|
- 'wake': ['woke', 'woken', 'waking', 'wakes'],
|
|
174
|
|
- 'bear': ['bore', 'born', 'bearing', 'bears'],
|
|
175
|
|
- 'beat': ['beat', 'beaten', 'beating', 'beats'],
|
|
176
|
|
- 'burst': ['burst', 'bursting', 'bursts'],
|
|
177
|
|
- 'deal': ['dealt', 'dealing', 'deals'],
|
|
178
|
|
- 'dig': ['dug', 'digging', 'digs'],
|
|
179
|
|
- 'forbid': ['forbade', 'forbidden', 'forbidding', 'forbids'],
|
|
180
|
|
- 'forecast': ['forecast', 'forecasting', 'forecasts'],
|
|
|
73
|
+ 'arise': ['arose', 'arisen', 'arising', 'arises'],
|
|
|
74
|
+ 'awake': ['awoke', 'awoken', 'awaking', 'awakes'],
|
|
|
75
|
+ 'backslide': ['backslid', 'backslidden', 'backsliding', 'backslides'],
|
|
|
76
|
+ 'become': ['became', 'become', 'becoming', 'becomes'], // 已有,但确保完整
|
|
|
77
|
+ 'behold': ['beheld', 'beholding', 'beholds'],
|
|
|
78
|
+ 'bend': ['bent', 'bending', 'bends'],
|
|
|
79
|
+ 'bet': ['bet', 'betting', 'bets'],
|
|
|
80
|
+ 'bid': ['bid', 'bidding', 'bids'],
|
|
|
81
|
+ 'bind': ['bound', 'binding', 'binds'],
|
|
|
82
|
+ 'bleed': ['bled', 'bleeding', 'bleeds'],
|
|
|
83
|
+ 'blow': ['blew', 'blown', 'blowing', 'blows'],
|
|
|
84
|
+ 'break': ['broke', 'broken', 'breaking', 'breaks'],
|
|
|
85
|
+ 'breed': ['bred', 'breeding', 'breeds'],
|
|
|
86
|
+ 'bring': ['brought', 'bringing', 'brings'],
|
|
|
87
|
+ 'build': ['built', 'building', 'builds'],
|
|
|
88
|
+ 'burn': ['burnt', 'burned', 'burning', 'burns'],
|
|
|
89
|
+ 'burst': ['burst', 'bursting', 'bursts'],
|
|
|
90
|
+ 'buy': ['bought', 'buying', 'buys'],
|
|
|
91
|
+ 'cast': ['cast', 'casting', 'casts'],
|
|
|
92
|
+ 'catch': ['caught', 'catching', 'catches'],
|
|
|
93
|
+ 'choose': ['chose', 'chosen', 'choosing', 'chooses'],
|
|
|
94
|
+ 'cling': ['clung', 'clinging', 'clings'],
|
|
|
95
|
+ 'come': ['came', 'coming', 'comes'],
|
|
|
96
|
+ 'cost': ['cost', 'costing', 'costs'],
|
|
|
97
|
+ 'creep': ['crept', 'creeping', 'creeps'],
|
|
|
98
|
+ 'cut': ['cut', 'cutting', 'cuts'],
|
|
|
99
|
+ 'deal': ['dealt', 'dealing', 'deals'],
|
|
|
100
|
+ 'dig': ['dug', 'digging', 'digs'],
|
|
|
101
|
+ 'dive': ['dove', 'dived', 'diving', 'dives'],
|
|
|
102
|
+ 'do': ['did', 'done', 'doing', 'does'],
|
|
|
103
|
+ 'draw': ['drew', 'drawn', 'drawing', 'draws'],
|
|
|
104
|
+ 'dream': ['dreamt', 'dreamed', 'dreaming', 'dreams'],
|
|
|
105
|
+ 'drink': ['drank', 'drunk', 'drinking', 'drinks'],
|
|
|
106
|
+ 'drive': ['drove', 'driven', 'driving', 'drives'],
|
|
|
107
|
+ 'eat': ['ate', 'eaten', 'eating', 'eats'],
|
|
|
108
|
+ 'fall': ['fell', 'fallen', 'falling', 'falls'],
|
|
|
109
|
+ 'feed': ['fed', 'feeding', 'feeds'],
|
|
|
110
|
+ 'feel': ['felt', 'feeling', 'feels'],
|
|
|
111
|
+ 'fight': ['fought', 'fighting', 'fights'],
|
|
|
112
|
+ 'find': ['found', 'finding', 'finds'],
|
|
|
113
|
+ 'fit': ['fit', 'fitted', 'fitting', 'fits'],
|
|
|
114
|
+ 'flee': ['fled', 'fleeing', 'flees'],
|
|
|
115
|
+ 'fling': ['flung', 'flinging', 'flings'],
|
|
|
116
|
+ 'fly': ['flew', 'flown', 'flying', 'flies'],
|
|
|
117
|
+ 'forbid': ['forbade', 'forbidden', 'forbidding', 'forbids'],
|
|
|
118
|
+ 'forget': ['forgot', 'forgotten', 'forgetting', 'forgets'],
|
|
181
|
119
|
'forgive': ['forgave', 'forgiven', 'forgiving', 'forgives'],
|
|
182
|
|
- 'grind': ['ground', 'grinding', 'grinds'],
|
|
183
|
|
- 'kneel': ['knelt', 'kneeling', 'kneels'],
|
|
184
|
|
- 'knit': ['knit', 'knitting', 'knits'],
|
|
185
|
|
- 'lean': ['leant', 'leaning', 'leans'],
|
|
186
|
|
- 'leap': ['leapt', 'leaping', 'leaps'],
|
|
187
|
|
- 'lend': ['lent', 'lending', 'lends'],
|
|
188
|
|
- 'mistake': ['mistook', 'mistaken', 'mistaking', 'mistakes'],
|
|
189
|
|
- 'overcome': ['overcame', 'overcome', 'overcoming', 'overcomes'],
|
|
190
|
|
- 'overtake': ['overtook', 'overtaken', 'overtaking', 'overtakes'],
|
|
191
|
|
- 'rid': ['rid', 'ridding', 'rids'],
|
|
192
|
|
- 'seek': ['sought', 'seeking', 'seeks'],
|
|
193
|
|
- 'sew': ['sewed', 'sewn', 'sewing', 'sews'],
|
|
194
|
|
- 'slay': ['slew', 'slain', 'slaying', 'slays'],
|
|
195
|
|
- 'slide': ['slid', 'sliding', 'slides'],
|
|
196
|
|
- 'sling': ['slung', 'slinging', 'slings'],
|
|
197
|
|
- 'slit': ['slit', 'slitting', 'slits'],
|
|
198
|
|
- 'smell': ['smelt', 'smelling', 'smells'],
|
|
199
|
|
- 'sow': ['sowed', 'sown', 'sowing', 'sows'],
|
|
200
|
|
- 'spell': ['spelt', 'spelling', 'spells'],
|
|
201
|
|
- 'spill': ['spilt', 'spilling', 'spills'],
|
|
202
|
|
- 'spit': ['spat', 'spitting', 'spits'],
|
|
203
|
|
- 'split': ['split', 'splitting', 'splits'],
|
|
204
|
|
- 'spread': ['spread', 'spreading', 'spreads'],
|
|
205
|
|
- 'spring': ['sprang', 'sprung', 'springing', 'springs'],
|
|
206
|
|
- 'stink': ['stank', 'stunk', 'stinking', 'stinks'],
|
|
207
|
|
- 'stride': ['strode', 'stridden', 'striding', 'strides'],
|
|
208
|
|
- 'string': ['strung', 'stringing', 'strings'],
|
|
209
|
|
- 'strive': ['strove', 'striven', 'striving', 'strives'],
|
|
210
|
|
- 'sweat': ['sweat', 'sweating', 'sweats'],
|
|
211
|
|
- 'swell': ['swelled', 'swollen', 'swelling', 'swells'],
|
|
212
|
|
- 'swing': ['swung', 'swinging', 'swings'],
|
|
213
|
|
- 'thrust': ['thrust', 'thrusting', 'thrusts'],
|
|
214
|
|
- 'tread': ['trod', 'trodden', 'treading', 'treads'],
|
|
215
|
|
- 'undergo': ['underwent', 'undergone', 'undergoing', 'undergoes'],
|
|
216
|
|
- 'undertake': ['undertook', 'undertaken', 'undertaking', 'undertakes'],
|
|
217
|
|
- 'upset': ['upset', 'upsetting', 'upsets'],
|
|
218
|
|
- 'weave': ['wove', 'woven', 'weaving', 'weaves'],
|
|
219
|
|
- 'weep': ['wept', 'weeping', 'weeps'],
|
|
220
|
|
- 'wind': ['wound', 'winding', 'winds'],
|
|
221
|
|
- 'withdraw': ['withdrew', 'withdrawn', 'withdrawing', 'withdraws'],
|
|
222
|
|
- 'wring': ['wrung', 'wringing', 'wrings']
|
|
|
120
|
+ 'freeze': ['froze', 'frozen', 'freezing', 'freezes'],
|
|
|
121
|
+ 'get': ['got', 'gotten', 'getting', 'gets'],
|
|
|
122
|
+ 'give': ['gave', 'given', 'giving', 'gives'],
|
|
|
123
|
+ 'go': ['went', 'gone', 'going', 'goes'],
|
|
|
124
|
+ 'grind': ['ground', 'grinding', 'grinds'],
|
|
|
125
|
+ 'grow': ['grew', 'grown', 'growing', 'grows'],
|
|
|
126
|
+ 'hang': ['hung', 'hanging', 'hangs'],
|
|
|
127
|
+ 'have': ['has', 'had', 'having'],
|
|
|
128
|
+ 'hear': ['heard', 'hearing', 'hears'],
|
|
|
129
|
+ 'hide': ['hid', 'hidden', 'hiding', 'hides'],
|
|
|
130
|
+ 'hit': ['hit', 'hitting', 'hits'],
|
|
|
131
|
+ 'hold': ['held', 'holding', 'holds'],
|
|
|
132
|
+ 'hurt': ['hurt', 'hurting', 'hurts'],
|
|
|
133
|
+ 'keep': ['kept', 'keeping', 'keeps'],
|
|
|
134
|
+ 'kneel': ['knelt', 'kneeling', 'kneels'],
|
|
|
135
|
+ 'know': ['knew', 'known', 'knowing', 'knows'],
|
|
|
136
|
+ 'lay': ['laid', 'laying', 'lays'],
|
|
|
137
|
+ 'lead': ['led', 'leading', 'leads'],
|
|
|
138
|
+ 'lean': ['leant', 'leaning', 'leans'],
|
|
|
139
|
+ 'leap': ['leapt', 'leaping', 'leaps'],
|
|
|
140
|
+ 'learn': ['learnt', 'learned', 'learning', 'learns'],
|
|
|
141
|
+ 'leave': ['left', 'leaving', 'leaves'],
|
|
|
142
|
+ 'lend': ['lent', 'lending', 'lends'],
|
|
|
143
|
+ 'let': ['let', 'letting', 'lets'],
|
|
|
144
|
+ 'lie': ['lay', 'lain', 'lying', 'lies'],
|
|
|
145
|
+ 'light': ['lit', 'lighting', 'lights'],
|
|
|
146
|
+ 'lose': ['lost', 'losing', 'loses'],
|
|
|
147
|
+ 'make': ['made', 'making', 'makes'],
|
|
|
148
|
+ 'mean': ['meant', 'meaning', 'means'],
|
|
|
149
|
+ 'meet': ['met', 'meeting', 'meets'],
|
|
|
150
|
+ 'mistake': ['mistook', 'mistaken', 'mistaking', 'mistakes'],
|
|
|
151
|
+ 'pay': ['paid', 'paying', 'pays'],
|
|
|
152
|
+ 'put': ['put', 'putting', 'puts'],
|
|
|
153
|
+ 'quit': ['quit', 'quitting', 'quits'],
|
|
|
154
|
+ 'read': ['read', 'reading', 'reads'],
|
|
|
155
|
+ 'ride': ['rode', 'ridden', 'riding', 'rides'],
|
|
|
156
|
+ 'ring': ['rang', 'rung', 'ringing', 'rings'],
|
|
|
157
|
+ 'rise': ['rose', 'risen', 'rising', 'rises'],
|
|
|
158
|
+ 'run': ['ran', 'run', 'running', 'runs'],
|
|
|
159
|
+ 'say': ['said', 'saying', 'says'],
|
|
|
160
|
+ 'see': ['saw', 'seen', 'seeing', 'sees'],
|
|
|
161
|
+ 'seek': ['sought', 'seeking', 'seeks'],
|
|
|
162
|
+ 'sell': ['sold', 'selling', 'sells'],
|
|
|
163
|
+ 'send': ['sent', 'sending', 'sends'],
|
|
|
164
|
+ 'set': ['set', 'setting', 'sets'],
|
|
|
165
|
+ 'shake': ['shook', 'shaken', 'shaking', 'shakes'],
|
|
|
166
|
+ 'shed': ['shed', 'shedding', 'sheds'],
|
|
|
167
|
+ 'shine': ['shone', 'shining', 'shines'],
|
|
|
168
|
+ 'shoot': ['shot', 'shooting', 'shoots'],
|
|
|
169
|
+ 'show': ['showed', 'shown', 'showing', 'shows'],
|
|
|
170
|
+ 'shrink': ['shrank', 'shrunk', 'shrinking', 'shrinks'],
|
|
|
171
|
+ 'shut': ['shut', 'shutting', 'shuts'],
|
|
|
172
|
+ 'sing': ['sang', 'sung', 'singing', 'sings'],
|
|
|
173
|
+ 'sink': ['sank', 'sunk', 'sinking', 'sinks'],
|
|
|
174
|
+ 'sit': ['sat', 'sitting', 'sits'],
|
|
|
175
|
+ 'sleep': ['slept', 'sleeping', 'sleeps'],
|
|
|
176
|
+ 'slide': ['slid', 'sliding', 'slides'],
|
|
|
177
|
+ 'sling': ['slung', 'slinging', 'slings'],
|
|
|
178
|
+ 'slink': ['slunk', 'slinking', 'slinks'],
|
|
|
179
|
+ 'smell': ['smelt', 'smelling', 'smells'],
|
|
|
180
|
+ 'speak': ['spoke', 'spoken', 'speaking', 'speaks'],
|
|
|
181
|
+ 'speed': ['sped', 'speeding', 'speeds'],
|
|
|
182
|
+ 'spell': ['spelt', 'spelling', 'spells'],
|
|
|
183
|
+ 'spend': ['spent', 'spending', 'spends'],
|
|
|
184
|
+ 'spill': ['spilt', 'spilling', 'spills'],
|
|
|
185
|
+ 'spin': ['spun', 'spinning', 'spins'],
|
|
|
186
|
+ 'spit': ['spat', 'spitting', 'spits'],
|
|
|
187
|
+ 'split': ['split', 'splitting', 'splits'],
|
|
|
188
|
+ 'spread': ['spread', 'spreading', 'spreads'],
|
|
|
189
|
+ 'spring': ['sprang', 'sprung', 'springing', 'springs'],
|
|
|
190
|
+ 'stand': ['stood', 'standing', 'stands'],
|
|
|
191
|
+ 'steal': ['stole', 'stolen', 'stealing', 'steals'],
|
|
|
192
|
+ 'stick': ['stuck', 'sticking', 'sticks'],
|
|
|
193
|
+ 'sting': ['stung', 'stinging', 'stings'],
|
|
|
194
|
+ 'stink': ['stank', 'stunk', 'stinking', 'stinks'],
|
|
|
195
|
+ 'strike': ['struck', 'striking', 'strikes'],
|
|
|
196
|
+ 'strive': ['strove', 'striven', 'striving', 'strives'],
|
|
|
197
|
+ 'swear': ['swore', 'sworn', 'swearing', 'swears'],
|
|
|
198
|
+ 'sweep': ['swept', 'sweeping', 'sweeps'],
|
|
|
199
|
+ 'swim': ['swam', 'swum', 'swimming', 'swims'],
|
|
|
200
|
+ 'swing': ['swung', 'swinging', 'swings'],
|
|
|
201
|
+ 'take': ['took', 'taken', 'taking', 'takes'],
|
|
|
202
|
+ 'teach': ['taught', 'teaching', 'teaches'],
|
|
|
203
|
+ 'tear': ['tore', 'torn', 'tearing', 'tears'],
|
|
|
204
|
+ 'tell': ['told', 'telling', 'tells'],
|
|
|
205
|
+ 'think': ['thought', 'thinking', 'thinks'],
|
|
|
206
|
+ 'throw': ['threw', 'thrown', 'throwing', 'throws'],
|
|
|
207
|
+ 'thrust': ['thrust', 'thrusting', 'thrusts'],
|
|
|
208
|
+ 'tread': ['trod', 'trodden', 'treading', 'treads'],
|
|
|
209
|
+ 'undergo': ['underwent', 'undergone', 'undergoing', 'undergoes'],
|
|
|
210
|
+ 'understand': ['understood', 'understanding', 'understands'],
|
|
|
211
|
+ 'undertake': ['undertook', 'undertaken', 'undertaking', 'undertakes'],
|
|
|
212
|
+ 'upset': ['upset', 'upsetting', 'upsets'],
|
|
|
213
|
+ 'wake': ['woke', 'woken', 'waking', 'wakes'],
|
|
|
214
|
+ 'wear': ['wore', 'worn', 'wearing', 'wears'],
|
|
|
215
|
+ 'weave': ['wove', 'woven', 'weaving', 'weaves'],
|
|
|
216
|
+ 'weep': ['wept', 'weeping', 'weeps'],
|
|
|
217
|
+ 'win': ['won', 'winning', 'wins'],
|
|
|
218
|
+ 'wind': ['wound', 'winding', 'winds'],
|
|
|
219
|
+ 'withdraw': ['withdrew', 'withdrawn', 'withdrawing', 'withdraws'],
|
|
|
220
|
+ 'wring': ['wrung', 'wringing', 'wrings']
|
|
|
221
|
+};
|
|
|
222
|
+
|
|
|
223
|
+// 不规则形容词/副词映射表
|
|
|
224
|
+const irregularAdjectives = {
|
|
|
225
|
+ 'good': ['better', 'best'],
|
|
|
226
|
+ 'bad': ['worse', 'worst'],
|
|
|
227
|
+ 'many': ['more', 'most'],
|
|
|
228
|
+ 'much': ['more', 'most'],
|
|
|
229
|
+ 'little': ['less', 'least'],
|
|
|
230
|
+ 'far': ['further', 'furthest', 'farther', 'farthest'],
|
|
|
231
|
+ 'old': ['older', 'oldest', 'elder', 'eldest'],
|
|
|
232
|
+ 'late': ['later', 'latest', 'latter', 'last'],
|
|
|
233
|
+ 'well': ['better', 'best'],
|
|
|
234
|
+ 'ill': ['worse', 'worst'],
|
|
|
235
|
+ 'easy': ['easier', 'easiest'],
|
|
|
236
|
+ 'happy': ['happier', 'happiest'],
|
|
|
237
|
+ 'busy': ['busier', 'busiest'],
|
|
|
238
|
+ 'pretty': ['prettier', 'prettiest'],
|
|
|
239
|
+ 'heavy': ['heavier', 'heaviest'],
|
|
|
240
|
+ 'simple': ['simpler', 'simplest'],
|
|
|
241
|
+ 'clever': ['cleverer', 'cleverest'],
|
|
|
242
|
+ 'narrow': ['narrower', 'narrowest'],
|
|
|
243
|
+ 'quiet': ['quieter', 'quietest'],
|
|
|
244
|
+ 'large': ['larger', 'largest'],
|
|
|
245
|
+ 'small': ['smaller', 'smallest'],
|
|
|
246
|
+ 'big': ['bigger', 'biggest'],
|
|
|
247
|
+ 'hot': ['hotter', 'hottest'],
|
|
|
248
|
+ 'tall': ['taller', 'tallest'],
|
|
|
249
|
+ 'short': ['shorter', 'shortest'],
|
|
|
250
|
+ 'long': ['longer', 'longest'],
|
|
|
251
|
+ 'high': ['higher', 'highest'],
|
|
|
252
|
+ 'low': ['lower', 'lowest'],
|
|
|
253
|
+ 'deep': ['deeper', 'deepest'],
|
|
|
254
|
+ 'wide': ['wider', 'widest'],
|
|
|
255
|
+ 'near': ['nearer', 'nearest'],
|
|
|
256
|
+ 'clean': ['cleaner', 'cleanest'],
|
|
|
257
|
+ 'quick': ['quicker', 'quickest'],
|
|
|
258
|
+ 'slow': ['slower', 'slowest'],
|
|
|
259
|
+ 'strong': ['stronger', 'strongest'],
|
|
|
260
|
+ 'weak': ['weaker', 'weakest'],
|
|
|
261
|
+ 'young': ['younger', 'youngest'],
|
|
|
262
|
+ 'rich': ['richer', 'richest'],
|
|
|
263
|
+ 'poor': ['poorer', 'poorest'],
|
|
|
264
|
+ 'thick': ['thicker', 'thickest'],
|
|
|
265
|
+ 'thin': ['thinner', 'thinnest'],
|
|
|
266
|
+ 'fast': ['faster', 'fastest'],
|
|
|
267
|
+ 'hard': ['harder', 'hardest'],
|
|
|
268
|
+ 'soft': ['softer', 'softest'],
|
|
|
269
|
+ 'warm': ['warmer', 'warmest'],
|
|
|
270
|
+ 'cool': ['cooler', 'coolest'],
|
|
|
271
|
+ 'cold': ['colder', 'coldest'],
|
|
|
272
|
+ 'bright': ['brighter', 'brightest'],
|
|
|
273
|
+ 'dark': ['darker', 'darkest'],
|
|
|
274
|
+ 'light': ['lighter', 'lightest'],
|
|
|
275
|
+ 'loud': ['louder', 'loudest'],
|
|
|
276
|
+ 'sweet': ['sweeter', 'sweetest'],
|
|
|
277
|
+ 'sour': ['sourer', 'sourest'],
|
|
|
278
|
+ 'bitter': ['bitterer', 'bitterest'],
|
|
|
279
|
+ 'nice': ['nicer', 'nicest'],
|
|
|
280
|
+ 'fine': ['finer', 'finest'],
|
|
|
281
|
+ 'brave': ['braver', 'bravest'],
|
|
|
282
|
+ 'calm': ['calmer', 'calmest'],
|
|
|
283
|
+ 'cute': ['cuter', 'cutest'],
|
|
|
284
|
+ 'fair': ['fairer', 'fairest'],
|
|
|
285
|
+ 'fresh': ['fresher', 'freshest'],
|
|
|
286
|
+ 'full': ['fuller', 'fullest'],
|
|
|
287
|
+ 'glad': ['gladder', 'gladdest'],
|
|
|
288
|
+ 'great': ['greater', 'greatest'],
|
|
|
289
|
+ 'kind': ['kinder', 'kindest'],
|
|
|
290
|
+ 'new': ['newer', 'newest'],
|
|
|
291
|
+ 'odd': ['odder', 'oddest'],
|
|
|
292
|
+ 'rare': ['rarer', 'rarest'],
|
|
|
293
|
+ 'real': ['realer', 'realest'],
|
|
|
294
|
+ 'safe': ['safer', 'safest'],
|
|
|
295
|
+ 'same': ['samer', 'samest'],
|
|
|
296
|
+ 'sure': ['surer', 'surest'],
|
|
|
297
|
+ 'true': ['truer', 'truest'],
|
|
|
298
|
+ 'wise': ['wiser', 'wisest']
|
|
|
299
|
+};
|
|
|
300
|
+
|
|
|
301
|
+// 特殊不规则名词复数形式
|
|
|
302
|
+const irregularNouns = {
|
|
|
303
|
+ 'child': ['children'],
|
|
|
304
|
+ 'man': ['men'],
|
|
|
305
|
+ 'woman': ['women'],
|
|
|
306
|
+ 'foot': ['feet'],
|
|
|
307
|
+ 'tooth': ['teeth'],
|
|
|
308
|
+ 'goose': ['geese'],
|
|
|
309
|
+ 'mouse': ['mice'],
|
|
|
310
|
+ 'ox': ['oxen'],
|
|
|
311
|
+ 'person': ['people'],
|
|
|
312
|
+ 'louse': ['lice'],
|
|
|
313
|
+ 'leaf': ['leaves'],
|
|
|
314
|
+ 'life': ['lives'],
|
|
|
315
|
+ 'knife': ['knives'],
|
|
|
316
|
+ 'wife': ['wives'],
|
|
|
317
|
+ 'half': ['halves'],
|
|
|
318
|
+ 'wolf': ['wolves'],
|
|
|
319
|
+ 'shelf': ['shelves'],
|
|
|
320
|
+ 'self': ['selves'],
|
|
|
321
|
+ 'calf': ['calves'],
|
|
|
322
|
+ 'thief': ['thieves'],
|
|
|
323
|
+ 'deer': ['deer'],
|
|
|
324
|
+ 'sheep': ['sheep'],
|
|
|
325
|
+ 'fish': ['fish'],
|
|
|
326
|
+ 'series': ['series'],
|
|
|
327
|
+ 'species': ['species'],
|
|
|
328
|
+ 'aircraft': ['aircraft'],
|
|
|
329
|
+ 'information': ['information'],
|
|
|
330
|
+ 'money': ['money'],
|
|
|
331
|
+ 'rice': ['rice'],
|
|
|
332
|
+ 'equipment': ['equipment'],
|
|
|
333
|
+ 'furniture': ['furniture'],
|
|
|
334
|
+ 'datum': ['data'],
|
|
|
335
|
+ 'phenomenon': ['phenomena'],
|
|
|
336
|
+ 'criterion': ['criteria']
|
|
223
|
337
|
};
|
|
224
|
338
|
|
|
225
|
339
|
export const stringUtils = {
|
|
|
340
|
+ // 导出不规则字典
|
|
|
341
|
+ irregularVerbs,
|
|
|
342
|
+ irregularAdjectives,
|
|
|
343
|
+ irregularNouns,
|
|
|
344
|
+
|
|
226
|
345
|
//给字符串左侧补零
|
|
227
|
346
|
AddZero: (str, length) => {
|
|
228
|
347
|
while (str.length < length) {
|
|
|
@@ -881,25 +1000,165 @@ export const stringUtils = {
|
|
881
|
1000
|
return result;
|
|
882
|
1001
|
},
|
|
883
|
1002
|
|
|
|
1003
|
+
|
|
|
1004
|
+
|
|
884
|
1005
|
/**
|
|
885
|
|
- * 获取单词的原形(基本形式)
|
|
886
|
|
- * @param {string} word - 要转换的单词
|
|
887
|
|
- * @returns {string[]} - 可能的原形单词数组
|
|
|
1006
|
+ * 获取单词的所有变形形式
|
|
|
1007
|
+ * @param {string} word - 要获取变形的单词
|
|
|
1008
|
+ * @returns {string[]} - 单词的所有变形形式数组
|
|
888
|
1009
|
*/
|
|
889
|
|
- getWordBaseForm: (word) => {
|
|
|
1010
|
+ getWordAllForms: (word) => {
|
|
|
1011
|
+ if (!word || typeof word !== 'string') {
|
|
|
1012
|
+ return [];
|
|
|
1013
|
+ }
|
|
|
1014
|
+ word = word.trim();
|
|
|
1015
|
+ if (word === '') {
|
|
|
1016
|
+ return [];
|
|
|
1017
|
+ }
|
|
|
1018
|
+
|
|
|
1019
|
+ // 保留原始单词,包括大小写和标点符号
|
|
|
1020
|
+ const originalWord = word;
|
|
|
1021
|
+
|
|
|
1022
|
+ // 处理缩写词中的撇号
|
|
|
1023
|
+ const hasApostrophe = word.includes("'");
|
|
|
1024
|
+
|
|
|
1025
|
+ // 转换为小写进行处理
|
|
890
|
1026
|
const lowerWord = word.toLowerCase();
|
|
891
|
|
- const possibleBaseWords = [];
|
|
892
|
1027
|
|
|
893
|
|
- // 检查是否是不规则动词的变形
|
|
894
|
|
- for (const [base, forms] of Object.entries(irregularVerbs)) {
|
|
895
|
|
- if (forms.includes(lowerWord)) {
|
|
896
|
|
- possibleBaseWords.push(base);
|
|
897
|
|
- return possibleBaseWords; // 不规则动词直接返回原形
|
|
|
1028
|
+ // 使用Set存储所有可能的变形,避免重复
|
|
|
1029
|
+ const allForms = new Set([lowerWord]); // 初始包含原单词
|
|
|
1030
|
+
|
|
|
1031
|
+ // 如果原始单词与小写形式不同,也添加原始单词
|
|
|
1032
|
+ if (originalWord !== lowerWord) {
|
|
|
1033
|
+ allForms.add(originalWord);
|
|
|
1034
|
+ }
|
|
|
1035
|
+
|
|
|
1036
|
+ // 可以考虑添加一个标志来避免重复检查
|
|
|
1037
|
+ let foundIrregular = false;
|
|
|
1038
|
+ // 检查是否是不规则动词
|
|
|
1039
|
+ for (const [base, forms] of Object.entries(stringUtils.irregularVerbs)) {
|
|
|
1040
|
+ if (base === lowerWord || forms.includes(lowerWord)) {
|
|
|
1041
|
+ // 添加原形和所有变形
|
|
|
1042
|
+ allForms.add(base);
|
|
|
1043
|
+ forms.forEach(form => allForms.add(form));
|
|
|
1044
|
+ foundIrregular = true;
|
|
|
1045
|
+ break;
|
|
|
1046
|
+ }
|
|
|
1047
|
+ }
|
|
|
1048
|
+
|
|
|
1049
|
+ // 不规则形容词/副词检查后应该也考虑是否返回
|
|
|
1050
|
+ for (const [base, forms] of Object.entries(stringUtils.irregularAdjectives)) {
|
|
|
1051
|
+ if (base === lowerWord || forms.includes(lowerWord)) {
|
|
|
1052
|
+ // 添加原形和所有变形
|
|
|
1053
|
+ allForms.add(base);
|
|
|
1054
|
+ forms.forEach(form => allForms.add(form));
|
|
|
1055
|
+ foundIrregular = true;
|
|
|
1056
|
+ break;
|
|
|
1057
|
+ }
|
|
|
1058
|
+ }
|
|
|
1059
|
+
|
|
|
1060
|
+ // 不规则名词检查后也应该设置标志
|
|
|
1061
|
+ if (stringUtils.irregularNouns[lowerWord]) {
|
|
|
1062
|
+ stringUtils.irregularNouns[lowerWord].forEach(form => allForms.add(form));
|
|
|
1063
|
+ foundIrregular = true;
|
|
|
1064
|
+ }
|
|
|
1065
|
+
|
|
|
1066
|
+ // 不规则名词复数形式检查后也应该设置标志
|
|
|
1067
|
+ for (const [singular, plurals] of Object.entries(stringUtils.irregularNouns)) {
|
|
|
1068
|
+ if (plurals.includes(lowerWord)) {
|
|
|
1069
|
+ allForms.add(singular);
|
|
|
1070
|
+ plurals.forEach(form => allForms.add(form));
|
|
|
1071
|
+ foundIrregular = true;
|
|
|
1072
|
+ break;
|
|
898
|
1073
|
}
|
|
899
|
1074
|
}
|
|
900
|
1075
|
|
|
901
|
1076
|
// 处理规则变形
|
|
|
1077
|
+ // 特殊处理一些常见的副词和特殊单词,避免错误的词干提取和变形
|
|
|
1078
|
+ const specialWords = {
|
|
|
1079
|
+ // 情态动词
|
|
|
1080
|
+ 'can': ['can', 'could', 'cannot', "can't"],
|
|
|
1081
|
+ 'may': ['may', 'might'],
|
|
|
1082
|
+ 'shall': ['shall', 'should', "shouldn't"],
|
|
|
1083
|
+ 'will': ['will', 'would', "won't", "wouldn't"],
|
|
|
1084
|
+ 'must': ['must', 'have to', 'has to', 'had to', "mustn't"],
|
|
|
1085
|
+
|
|
|
1086
|
+ // 副词
|
|
|
1087
|
+ 'early': ['early', 'earlier', 'earliest'],
|
|
|
1088
|
+ 'only': ['only'],
|
|
|
1089
|
+ 'likely': ['likely', 'more likely', 'most likely'],
|
|
|
1090
|
+ 'friendly': ['friendly', 'friendlier', 'friendliest'],
|
|
|
1091
|
+ 'lovely': ['lovely', 'lovelier', 'loveliest'],
|
|
|
1092
|
+ 'timely': ['timely', 'timelier', 'timeliest'],
|
|
|
1093
|
+ 'weekly': ['weekly'],
|
|
|
1094
|
+ 'monthly': ['monthly'],
|
|
|
1095
|
+ 'yearly': ['yearly'],
|
|
|
1096
|
+ 'daily': ['daily'],
|
|
|
1097
|
+ 'badly': ['badly', 'worse', 'worst'], // 对应bad的副词形式
|
|
|
1098
|
+
|
|
|
1099
|
+ // 特殊动词
|
|
|
1100
|
+ 'play': ['play', 'plays', 'played', 'playing'],
|
|
|
1101
|
+ 'stay': ['stay', 'stays', 'stayed', 'staying'],
|
|
|
1102
|
+ 'pay': ['pay', 'pays', 'paid', 'paying'],
|
|
|
1103
|
+ 'lay': ['lay', 'lays', 'laid', 'laying'],
|
|
|
1104
|
+ 'say': ['say', 'says', 'said', 'saying'],
|
|
|
1105
|
+
|
|
|
1106
|
+ // 特殊名词
|
|
|
1107
|
+ 'box': ['box', 'boxes'],
|
|
|
1108
|
+ 'fox': ['fox', 'foxes'],
|
|
|
1109
|
+ 'tax': ['tax', 'taxes'],
|
|
|
1110
|
+ 'bush': ['bush', 'bushes'],
|
|
|
1111
|
+ 'dish': ['dish', 'dishes'],
|
|
|
1112
|
+ 'church': ['church', 'churches'],
|
|
|
1113
|
+ 'tomato': ['tomato', 'tomatoes'],
|
|
|
1114
|
+ 'potato': ['potato', 'potatoes'],
|
|
|
1115
|
+ 'hero': ['hero', 'heroes'],
|
|
|
1116
|
+ 'echo': ['echo', 'echoes'],
|
|
|
1117
|
+
|
|
|
1118
|
+ 'its': ['its'], // 物主代词
|
|
|
1119
|
+ 'it\'s': ['it\'s'], // it is 的缩写
|
|
|
1120
|
+ 'I\'m': ['I\'m'], // I am 的缩写
|
|
|
1121
|
+ 'don\'t': ['don\'t'], // do not 的缩写
|
|
|
1122
|
+ 'doesn\'t': ['doesn\'t'], // does not 的缩写
|
|
|
1123
|
+ 'we\'re': ['we are'],
|
|
|
1124
|
+ 'they\'re': ['they are'],
|
|
|
1125
|
+ 'you\'re': ['you are'],
|
|
|
1126
|
+ 'he\'s': ['he is', 'he has'],
|
|
|
1127
|
+ 'she\'s': ['she is', 'she has'],
|
|
|
1128
|
+ 'it\'d': ['it would', 'it had'],
|
|
|
1129
|
+ 'we\'d': ['we would', 'we had'],
|
|
|
1130
|
+ 'they\'d': ['they would', 'they had'],
|
|
|
1131
|
+ 'couldn\'t': ['could not'],
|
|
|
1132
|
+ 'should\'ve': ['should have'],
|
|
|
1133
|
+ 'would\'ve': ['would have'],
|
|
|
1134
|
+ 'could\'ve': ['could have']
|
|
|
1135
|
+ };
|
|
|
1136
|
+
|
|
|
1137
|
+ // 如果是特殊单词,直接返回预定义的变形
|
|
|
1138
|
+ if (specialWords[lowerWord]) {
|
|
|
1139
|
+ specialWords[lowerWord].forEach(form => allForms.add(form));
|
|
|
1140
|
+ return [...allForms];
|
|
|
1141
|
+ }
|
|
|
1142
|
+
|
|
|
1143
|
+ // 检查是否是特殊单词的变形
|
|
|
1144
|
+ for (const [base, forms] of Object.entries(specialWords)) {
|
|
|
1145
|
+ if (forms.includes(lowerWord)) {
|
|
|
1146
|
+ forms.forEach(form => allForms.add(form));
|
|
|
1147
|
+ return [...allForms];
|
|
|
1148
|
+ }
|
|
|
1149
|
+ }
|
|
|
1150
|
+
|
|
|
1151
|
+ // 获取单词的原形(基本形式)
|
|
|
1152
|
+ const possibleBaseWords = [];
|
|
|
1153
|
+
|
|
|
1154
|
+ // 特殊单词列表,这些单词不应被识别为任何变形
|
|
|
1155
|
+ const specialBaseWords = ['this', 'is', 'was', 'has', 'his', 'its', 'us', 'yes', 'thus', 'plus'];
|
|
|
1156
|
+ if (specialBaseWords.includes(lowerWord)) {
|
|
|
1157
|
+ possibleBaseWords.push(lowerWord);
|
|
|
1158
|
+ return [...allForms];
|
|
|
1159
|
+ }
|
|
902
|
1160
|
|
|
|
1161
|
+ // 处理规则变形
|
|
903
|
1162
|
// 处理过去式/过去分词 (-ed)
|
|
904
|
1163
|
if (lowerWord.endsWith('ed')) {
|
|
905
|
1164
|
possibleBaseWords.push(lowerWord.slice(0, -2)); // 常规情况 (walked -> walk)
|
|
|
@@ -992,7 +1251,7 @@ export const stringUtils = {
|
|
992
|
1251
|
if (lowerWord.endsWith('ily')) {
|
|
993
|
1252
|
possibleBaseWords.push(lowerWord.slice(0, -3) + 'y'); // 如 happily -> happy
|
|
994
|
1253
|
}
|
|
995
|
|
-
|
|
|
1254
|
+
|
|
996
|
1255
|
// 处理复数形式
|
|
997
|
1256
|
if (lowerWord.endsWith('s') && !lowerWord.endsWith('ss')) {
|
|
998
|
1257
|
possibleBaseWords.push(lowerWord.slice(0, -1)); // 常规情况 (books -> book)
|
|
|
@@ -1040,6 +1299,195 @@ export const stringUtils = {
|
|
1040
|
1299
|
// 去重并过滤掉过短的单词
|
|
1041
|
1300
|
const uniqueBaseWords = [...new Set(possibleBaseWords)].filter(w => w.length >= 2);
|
|
1042
|
1301
|
|
|
1043
|
|
- return uniqueBaseWords;
|
|
|
1302
|
+ // 如果没有找到任何可能的原形,或者所有可能的原形都不是有效单词,则返回单词本身
|
|
|
1303
|
+ if (uniqueBaseWords.length === 0 || !uniqueBaseWords.some(w => w === lowerWord || w.length >= 3)) {
|
|
|
1304
|
+ // 清空可能不正确的原形
|
|
|
1305
|
+ uniqueBaseWords.length = 0;
|
|
|
1306
|
+ uniqueBaseWords.push(lowerWord);
|
|
|
1307
|
+ }
|
|
|
1308
|
+
|
|
|
1309
|
+ // 添加所有可能的原形到变形集合中
|
|
|
1310
|
+ uniqueBaseWords.forEach(base => allForms.add(base));
|
|
|
1311
|
+
|
|
|
1312
|
+ // 判断词性的简单启发式规则
|
|
|
1313
|
+ const adjectiveSuffixes = ['ful', 'ous', 'ive', 'ic', 'al', 'ent', 'ant', 'able', 'ible', 'ary', 'ory', 'ish'];
|
|
|
1314
|
+ const verbSuffixes = ['ize', 'ise', 'ate', 'ify', 'en'];
|
|
|
1315
|
+ const nounSuffixes = ['tion', 'sion', 'ment', 'ness', 'ity', 'hood', 'ship', 'dom', 'ism', 'ist'];
|
|
|
1316
|
+ const adverbSuffixes = ['ly', 'ward', 'wise'];// 副词后缀
|
|
|
1317
|
+
|
|
|
1318
|
+
|
|
|
1319
|
+ // 一些常见的形容词
|
|
|
1320
|
+ const commonAdjectives = ['good', 'bad', 'big', 'small', 'high', 'low', 'long', 'short', 'old', 'new',
|
|
|
1321
|
+ 'fast', 'slow', 'hard', 'soft', 'hot', 'cold', 'warm', 'cool', 'rich', 'poor',
|
|
|
1322
|
+ 'thick', 'thin', 'wide', 'narrow', 'deep', 'shallow', 'strong', 'weak', 'young',
|
|
|
1323
|
+ 'old', 'bright', 'dark', 'light', 'heavy', 'easy', 'hard', 'clean', 'dirty',
|
|
|
1324
|
+ 'full', 'empty', 'dry', 'wet', 'sick', 'healthy', 'loud', 'quiet', 'sweet',
|
|
|
1325
|
+ 'sour', 'bitter', 'nice', 'mean', 'kind', 'cruel', 'brave', 'afraid', 'happy',
|
|
|
1326
|
+ 'sad', 'angry', 'calm', 'busy', 'free', 'cheap', 'expensive', 'safe', 'dangerous'];
|
|
|
1327
|
+
|
|
|
1328
|
+ // 一些常见的动词
|
|
|
1329
|
+ const commonVerbs = ['go', 'come', 'get', 'give', 'make', 'take', 'put', 'set', 'let', 'run', 'move',
|
|
|
1330
|
+ 'walk', 'talk', 'look', 'see', 'hear', 'feel', 'think', 'know', 'find', 'want',
|
|
|
1331
|
+ 'need', 'use', 'try', 'ask', 'work', 'call', 'help', 'play', 'stop', 'start',
|
|
|
1332
|
+ 'turn', 'show', 'tell', 'say', 'write', 'read', 'sing', 'eat', 'drink', 'sleep',
|
|
|
1333
|
+ 'sit', 'stand', 'lie', 'fall', 'rise', 'leave', 'reach', 'like', 'love', 'hate',
|
|
|
1334
|
+ 'hope', 'live', 'die', 'buy', 'sell', 'pay', 'build', 'break', 'cut', 'open', 'close'];
|
|
|
1335
|
+
|
|
|
1336
|
+ // 检查是否是形容词
|
|
|
1337
|
+ let isLikelyAdjective = adjectiveSuffixes.some(suffix => lowerWord.endsWith(suffix)) ||
|
|
|
1338
|
+ commonAdjectives.includes(lowerWord) ||
|
|
|
1339
|
+ (lowerWord.length <= 6 && !nounSuffixes.some(suffix => lowerWord.endsWith(suffix)));
|
|
|
1340
|
+
|
|
|
1341
|
+ // 检查是否是动词
|
|
|
1342
|
+ let isLikelyVerb = verbSuffixes.some(suffix => lowerWord.endsWith(suffix)) ||
|
|
|
1343
|
+ commonVerbs.includes(lowerWord) ||
|
|
|
1344
|
+ (lowerWord.length <= 5 && !adjectiveSuffixes.some(suffix => lowerWord.endsWith(suffix)) &&
|
|
|
1345
|
+ !nounSuffixes.some(suffix => lowerWord.endsWith(suffix)));
|
|
|
1346
|
+
|
|
|
1347
|
+ // 检查是否是副词
|
|
|
1348
|
+ let isLikelyAdverb = adverbSuffixes.some(suffix => lowerWord.endsWith(suffix));
|
|
|
1349
|
+
|
|
|
1350
|
+ // 动词变形 (如果可能是动词)
|
|
|
1351
|
+ if (isLikelyVerb && lowerWord.length >= 2) {
|
|
|
1352
|
+ // 第三人称单数
|
|
|
1353
|
+ if (lowerWord.endsWith('s') || lowerWord.endsWith('x') || lowerWord.endsWith('ch') || lowerWord.endsWith('sh') || lowerWord.endsWith('z')) {
|
|
|
1354
|
+ allForms.add(lowerWord + 'es');
|
|
|
1355
|
+ } else if (lowerWord.endsWith('y') && !['a', 'e', 'i', 'o', 'u'].includes(lowerWord.charAt(lowerWord.length - 2))) {
|
|
|
1356
|
+ allForms.add(lowerWord.slice(0, -1) + 'ies');
|
|
|
1357
|
+ } else {
|
|
|
1358
|
+ allForms.add(lowerWord + 's');
|
|
|
1359
|
+ }
|
|
|
1360
|
+
|
|
|
1361
|
+ // 过去式和过去分词 (-ed)
|
|
|
1362
|
+ if (lowerWord.endsWith('e')) {
|
|
|
1363
|
+ allForms.add(lowerWord + 'd');
|
|
|
1364
|
+ } else if (lowerWord.endsWith('y') && !['a', 'e', 'i', 'o', 'u'].includes(lowerWord.charAt(lowerWord.length - 2))) {
|
|
|
1365
|
+ allForms.add(lowerWord.slice(0, -1) + 'ied');
|
|
|
1366
|
+ } else if (lowerWord.length > 2 &&
|
|
|
1367
|
+ !['a', 'e', 'i', 'o', 'u'].includes(lowerWord.charAt(lowerWord.length - 1)) &&
|
|
|
1368
|
+ ['a', 'e', 'i', 'o', 'u'].includes(lowerWord.charAt(lowerWord.length - 2)) &&
|
|
|
1369
|
+ !['a', 'e', 'i', 'o', 'u'].includes(lowerWord.charAt(lowerWord.length - 3))) {
|
|
|
1370
|
+ // 双写末尾辅音字母的情况,如 stop -> stopped
|
|
|
1371
|
+ allForms.add(lowerWord + lowerWord.charAt(lowerWord.length - 1) + 'ed');
|
|
|
1372
|
+ } else {
|
|
|
1373
|
+ allForms.add(lowerWord + 'ed');
|
|
|
1374
|
+ }
|
|
|
1375
|
+
|
|
|
1376
|
+ // 现在分词 (-ing)
|
|
|
1377
|
+ if (lowerWord.endsWith('ie')) {
|
|
|
1378
|
+ allForms.add(lowerWord.slice(0, -2) + 'ying');
|
|
|
1379
|
+ } else if (lowerWord.endsWith('e') && lowerWord.length > 2) {
|
|
|
1380
|
+ allForms.add(lowerWord.slice(0, -1) + 'ing');
|
|
|
1381
|
+ } else if (lowerWord.length > 2 &&
|
|
|
1382
|
+ !['a', 'e', 'i', 'o', 'u'].includes(lowerWord.charAt(lowerWord.length - 1)) &&
|
|
|
1383
|
+ ['a', 'e', 'i', 'o', 'u'].includes(lowerWord.charAt(lowerWord.length - 2)) &&
|
|
|
1384
|
+ !['a', 'e', 'i', 'o', 'u'].includes(lowerWord.charAt(lowerWord.length - 3))) {
|
|
|
1385
|
+ // 双写末尾辅音字母的情况,如 run -> running
|
|
|
1386
|
+ allForms.add(lowerWord + lowerWord.charAt(lowerWord.length - 1) + 'ing');
|
|
|
1387
|
+ } else {
|
|
|
1388
|
+ allForms.add(lowerWord + 'ing');
|
|
|
1389
|
+ }
|
|
|
1390
|
+ }
|
|
|
1391
|
+
|
|
|
1392
|
+ // 形容词和副词变形 (如果可能是形容词或副词)
|
|
|
1393
|
+ if ((isLikelyAdjective || isLikelyAdverb) && lowerWord.length >= 3 &&
|
|
|
1394
|
+ !lowerWord.endsWith('ing') && !lowerWord.endsWith('ed') &&
|
|
|
1395
|
+ lowerWord.length <= 8) { // 限制长度,避免生成不必要的变形
|
|
|
1396
|
+
|
|
|
1397
|
+ // 检查是否是多音节形容词,这些通常使用 more/most 而不是 -er/-est
|
|
|
1398
|
+ const isMultisyllabic = lowerWord.length > 7 ||
|
|
|
1399
|
+ lowerWord.endsWith('ful') ||
|
|
|
1400
|
+ lowerWord.endsWith('ous') ||
|
|
|
1401
|
+ lowerWord.endsWith('ive') ||
|
|
|
1402
|
+ lowerWord.endsWith('ic') ||
|
|
|
1403
|
+ lowerWord.endsWith('al') ||
|
|
|
1404
|
+ lowerWord.endsWith('ent') ||
|
|
|
1405
|
+ lowerWord.endsWith('ant') ||
|
|
|
1406
|
+ lowerWord.endsWith('able') ||
|
|
|
1407
|
+ lowerWord.endsWith('ible');
|
|
|
1408
|
+
|
|
|
1409
|
+ // 只为短形容词生成比较级和最高级
|
|
|
1410
|
+ if (!isMultisyllabic) {
|
|
|
1411
|
+ // 比较级 (-er)
|
|
|
1412
|
+ if (lowerWord.endsWith('e')) {
|
|
|
1413
|
+ allForms.add(lowerWord + 'r');
|
|
|
1414
|
+ } else if (lowerWord.endsWith('y') && !['a', 'e', 'i', 'o', 'u'].includes(lowerWord.charAt(lowerWord.length - 2))) {
|
|
|
1415
|
+ allForms.add(lowerWord.slice(0, -1) + 'ier');
|
|
|
1416
|
+ } else if (lowerWord.length > 2 &&
|
|
|
1417
|
+ !['a', 'e', 'i', 'o', 'u'].includes(lowerWord.charAt(lowerWord.length - 1)) &&
|
|
|
1418
|
+ ['a', 'e', 'i', 'o', 'u'].includes(lowerWord.charAt(lowerWord.length - 2)) &&
|
|
|
1419
|
+ !['a', 'e', 'i', 'o', 'u'].includes(lowerWord.charAt(lowerWord.length - 3))) {
|
|
|
1420
|
+ // 双写末尾辅音字母的情况,如 big -> bigger
|
|
|
1421
|
+ allForms.add(lowerWord + lowerWord.charAt(lowerWord.length - 1) + 'er');
|
|
|
1422
|
+ } else {
|
|
|
1423
|
+ allForms.add(lowerWord + 'er');
|
|
|
1424
|
+ }
|
|
|
1425
|
+
|
|
|
1426
|
+ // 最高级 (-est)
|
|
|
1427
|
+ if (lowerWord.endsWith('e')) {
|
|
|
1428
|
+ allForms.add(lowerWord + 'st');
|
|
|
1429
|
+ } else if (lowerWord.endsWith('y') && !['a', 'e', 'i', 'o', 'u'].includes(lowerWord.charAt(lowerWord.length - 2))) {
|
|
|
1430
|
+ allForms.add(lowerWord.slice(0, -1) + 'iest');
|
|
|
1431
|
+ } else if (lowerWord.length > 2 &&
|
|
|
1432
|
+ !['a', 'e', 'i', 'o', 'u'].includes(lowerWord.charAt(lowerWord.length - 1)) &&
|
|
|
1433
|
+ ['a', 'e', 'i', 'o', 'u'].includes(lowerWord.charAt(lowerWord.length - 2)) &&
|
|
|
1434
|
+ !['a', 'e', 'i', 'o', 'u'].includes(lowerWord.charAt(lowerWord.length - 3))) {
|
|
|
1435
|
+ // 双写末尾辅音字母的情况,如 big -> biggest
|
|
|
1436
|
+ allForms.add(lowerWord + lowerWord.charAt(lowerWord.length - 1) + 'est');
|
|
|
1437
|
+ } else {
|
|
|
1438
|
+ allForms.add(lowerWord + 'est');
|
|
|
1439
|
+ }
|
|
|
1440
|
+ }
|
|
|
1441
|
+
|
|
|
1442
|
+ // 副词变形 (-ly),只为真正的形容词生成副词形式
|
|
|
1443
|
+ if (!lowerWord.endsWith('ly')) {
|
|
|
1444
|
+ if (lowerWord.endsWith('y') && !['a', 'e', 'i', 'o', 'u'].includes(lowerWord.charAt(lowerWord.length - 2))) {
|
|
|
1445
|
+ allForms.add(lowerWord.slice(0, -1) + 'ily');
|
|
|
1446
|
+ } else if (lowerWord.endsWith('le')) {
|
|
|
1447
|
+ allForms.add(lowerWord.slice(0, -1) + 'y');
|
|
|
1448
|
+ } else {
|
|
|
1449
|
+ allForms.add(lowerWord + 'ly');
|
|
|
1450
|
+ }
|
|
|
1451
|
+ }
|
|
|
1452
|
+ }
|
|
|
1453
|
+
|
|
|
1454
|
+ // 名词复数形式 (对大多数单词都适用)
|
|
|
1455
|
+ if (lowerWord.length >= 2 && !lowerWord.endsWith('ing') && !lowerWord.endsWith('ed')) {
|
|
|
1456
|
+ if (lowerWord.endsWith('s') || lowerWord.endsWith('x') || lowerWord.endsWith('ch') || lowerWord.endsWith('sh') || lowerWord.endsWith('z')) {
|
|
|
1457
|
+ allForms.add(lowerWord + 'es');
|
|
|
1458
|
+ } else if (lowerWord.endsWith('y') && !['a', 'e', 'i', 'o', 'u'].includes(lowerWord.charAt(lowerWord.length - 2))) {
|
|
|
1459
|
+ allForms.add(lowerWord.slice(0, -1) + 'ies');
|
|
|
1460
|
+ } else if (lowerWord.endsWith('f')) {
|
|
|
1461
|
+ allForms.add(lowerWord.slice(0, -1) + 'ves');
|
|
|
1462
|
+ } else if (lowerWord.endsWith('fe')) {
|
|
|
1463
|
+ allForms.add(lowerWord.slice(0, -2) + 'ves');
|
|
|
1464
|
+ } else {
|
|
|
1465
|
+ allForms.add(lowerWord + 's');
|
|
|
1466
|
+ }
|
|
|
1467
|
+ }
|
|
|
1468
|
+
|
|
|
1469
|
+ return [...allForms];
|
|
1044
|
1470
|
},
|
|
|
1471
|
+
|
|
|
1472
|
+ /**
|
|
|
1473
|
+ * 检查单词是否符合特定的变形规则
|
|
|
1474
|
+ * @param {string} word - 要检查的单词
|
|
|
1475
|
+ * @param {string} base - 基本形式
|
|
|
1476
|
+ * @returns {boolean} - 是否符合变形规则
|
|
|
1477
|
+ */
|
|
|
1478
|
+ checkSpecialWordForms(word, base) {
|
|
|
1479
|
+ // 使用 getWordAllForms 获取基本形式的所有可能变形
|
|
|
1480
|
+ const baseForms = this.getWordAllForms(base);
|
|
|
1481
|
+ if (baseForms.includes(word)) {
|
|
|
1482
|
+ return true;
|
|
|
1483
|
+ }
|
|
|
1484
|
+
|
|
|
1485
|
+ // 反向检查:如果 word 是基本形式,base 是变形
|
|
|
1486
|
+ const wordForms = this.getWordAllForms(word);
|
|
|
1487
|
+ if (wordForms.includes(base)) {
|
|
|
1488
|
+ return true;
|
|
|
1489
|
+ }
|
|
|
1490
|
+
|
|
|
1491
|
+ return false;
|
|
|
1492
|
+ }
|
|
1045
|
1493
|
}
|