ocr.js 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. import common from '../../utils/util';
  2. import main from '../../utils/main';
  3. const app = getApp();
  4. Page({
  5. data: {
  6. Menu:[],
  7. cameraActive: true,
  8. },
  9. onLoad: function (options) {
  10. app.globalData.OCRWords=[];
  11. },
  12. onShow:function(){
  13. let that = this;
  14. that.setData({
  15. cameraActive: true,
  16. Menu:[{Name:"相册",CSS:"",Fun:"chooseImage"},{Name:"拍照",CSS:"Selected",Fun:"retake"}]
  17. });
  18. },
  19. retake() {
  20. this.data.Menu[0].CSS="";
  21. this.data.Menu[1].CSS="Selected";
  22. this.setData({
  23. Menu:this.data.Menu,
  24. cameraActive: true,
  25. });
  26. },
  27. // 拍照识别
  28. takePhoto() {
  29. const ctx = wx.createCameraContext();
  30. ctx.takePhoto({
  31. quality: 'high',
  32. success: (res) => {
  33. this.setData({
  34. imageUrl: res.tempImagePath,
  35. cameraActive: false,
  36. })
  37. this.performOCR(res.tempImagePath)
  38. },
  39. fail: (err) => {
  40. console.error('拍照失败:', err)
  41. wx.showToast({
  42. title: '拍照失败,请重试',
  43. icon: 'none'
  44. })
  45. }
  46. })
  47. },
  48. chooseImage() {
  49. this.data.Menu[0].CSS="Selected";
  50. this.data.Menu[1].CSS="";
  51. this.setData({
  52. Menu:this.data.Menu,
  53. cameraActive: false,
  54. });
  55. wx.chooseMedia({
  56. count: 1,
  57. mediaType: ['image'],
  58. sourceType: ['album'],
  59. success: (res) => {
  60. const tempFilePath = res.tempFiles[0].tempFilePath
  61. this.setData({
  62. imageUrl: tempFilePath,
  63. })
  64. this.performOCR(tempFilePath)
  65. },
  66. fail: (err) => {
  67. console.error('选择图片失败:', err)
  68. wx.showToast({
  69. title: '选择图片失败',
  70. icon: 'none'
  71. })
  72. }
  73. })
  74. },
  75. // 加强版的OCR识别方法
  76. async performOCR(imagePath) {
  77. let that=this;
  78. if (!imagePath) {
  79. console.error('图片路径无效')
  80. }
  81. wx.showLoading({ title: '识别中...', mask: true })
  82. try {
  83. // 1. 压缩图片
  84. const compressed = await new Promise((resolve, reject) => {
  85. wx.compressImage({
  86. src: imagePath,
  87. quality: 70,
  88. success: resolve,
  89. fail: () => resolve({ tempFilePath: imagePath })
  90. })
  91. })
  92. //console.log("1");
  93. // 2. 转换为base64
  94. const fileRes = await new Promise((resolve, reject) => {
  95. wx.getFileSystemManager().readFile({
  96. filePath: compressed.tempFilePath,
  97. encoding: 'base64',
  98. success: resolve,
  99. fail: reject
  100. })
  101. })
  102. //console.log("2");
  103. // 3. 调用云函数(添加超时处理)
  104. let postData={ ImageBase64: `data:image/jpeg;base64,${fileRes.data}` };
  105. let url = common.Encrypt("OCRImageData");
  106. url=app.globalData.serverUrl+url;
  107. //console.log("url:"+url);
  108. const cloudRes = await new Promise((resolve, reject) => {
  109. wx.request({
  110. url: url,
  111. method: "POST",
  112. data: postData,
  113. success: resolve,
  114. fail: reject,
  115. })
  116. });
  117. //console.log("3");
  118. // 4. 验证返回结果
  119. if (!cloudRes || !cloudRes.data.result) {
  120. throw new Error('无效的响应格式')
  121. }
  122. //console.log("4");
  123. if (!cloudRes.data.result) {
  124. throw new Error(cloudRes.data.result.message || '识别服务返回空数据')
  125. }
  126. // 5. 处理识别结果
  127. const texts = cloudRes.data.result.TextDetections.map(item => ({
  128. text: item.DetectedText || '未识别到文字',
  129. pos: this.convertPosition(item.ItemPolygon || { Points: [] })
  130. })).filter(item => item.DetectedText !== '未识别到文字')
  131. //console.log("5");
  132. if (texts.length === 0) {
  133. throw new Error('未识别到有效文字')
  134. }
  135. // 6.提取英文单词
  136. const engTexts=this.extractEnglishWords(texts);
  137. // let arr=[];
  138. // for(var i=0;i<engTexts.length;i++){
  139. // let obj={};
  140. // obj.Word=engTexts[i];
  141. // obj.Selected=0;
  142. // arr.push(obj);
  143. // }
  144. app.globalData.OCRWords=engTexts;
  145. //debugger;
  146. wx.redirectTo({
  147. url: "./selectword",
  148. });
  149. that.setData({
  150. cameraActive: true,
  151. })
  152. } catch (err) {
  153. console.error('OCR处理失败:', err)
  154. wx.showToast({
  155. title: err.message || '识别失败',
  156. icon: 'none',
  157. duration: 3000
  158. })
  159. } finally {
  160. wx.hideLoading();
  161. that.setData({
  162. cameraActive: true,
  163. })
  164. }
  165. },
  166. // 加强坐标转换
  167. convertPosition(polygon) {
  168. try {
  169. const points = polygon.Points || []
  170. if (points.length === 0) {
  171. return { x: 50, y: 50, width: 200, height: 30 }
  172. }
  173. const xs = points.map(p => p.X || 0)
  174. const ys = points.map(p => p.Y || 0)
  175. return {
  176. x: Math.min(...xs),
  177. y: Math.min(...ys),
  178. width: Math.max(...xs) - Math.min(...xs),
  179. height: Math.max(...ys) - Math.min(...ys)
  180. }
  181. } catch (e) {
  182. return { x: 50, y: 50, width: 200, height: 30 }
  183. }
  184. },
  185. // 提取英语单词的函数 - 增强版
  186. extractEnglishWords(texts) {
  187. console.group('英语单词提取');
  188. const words = new Set();
  189. texts.forEach(item => {
  190. const text = item.text;
  191. console.log('处理文本:', text);
  192. // 使用多种分隔符分割文本(空格、逗号、句号、感叹号、中文字符等)
  193. // 这个正则表达式会匹配任何非英文字母、撇号或连字符的字符作为分隔符
  194. const parts = text.split(/[^A-Za-z''-]+/).filter(Boolean);
  195. console.log('分割后的部分:', parts);
  196. // 处理每个可能的单词
  197. parts.forEach(part => {
  198. // 清理并验证单词
  199. const cleanWord = this.cleanWord(part);
  200. // 特殊处理单词"I"
  201. if (cleanWord === 'I' || cleanWord === 'a') {
  202. words.add(cleanWord); // 添加小写的"i"
  203. console.log('添加单词: I (特殊处理)');
  204. }
  205. // 处理其他单词(长度>=2)
  206. else if (cleanWord && cleanWord.length >= 2 && /^[A-Za-z''-]+$/.test(cleanWord)) {
  207. let lowerWord = cleanWord.toLowerCase();
  208. if (lowerWord=="i'm"){
  209. lowerWord="I'm";
  210. }
  211. words.add(lowerWord);
  212. console.log('添加单词:', lowerWord);
  213. }
  214. });
  215. });
  216. //const result = Array.from(words).sort();
  217. let result = Array.from(words);
  218. result = common.removeDuplicateAndTrimStrings(result);
  219. console.log('提取结果:', result);
  220. console.groupEnd();
  221. return result;
  222. },
  223. // 清理单词,去除非字母字符
  224. cleanWord(word) {
  225. if (!word) return '';
  226. // 去除单词前后的非字母字符
  227. const cleaned = word.replace(/^[^A-Za-z]+|[^A-Za-z]+$/g, '');
  228. // 保留单词中间的撇号和连字符
  229. return cleaned.replace(/[^A-Za-z''-]/g, '');
  230. },
  231. onShareAppMessage: function () {
  232. return {
  233. title: app.globalData.ShareTitle,
  234. path: app.globalData.SharePath + '?UserID=' + app.globalData.userInfo.UserID,
  235. imageUrl: app.globalData.ShareImage,
  236. }
  237. },
  238. })