ocr.js 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268
  1. import common from '../../utils/util';
  2. import main from '../../utils/main';
  3. const app = getApp();
  4. Page({
  5. data: {
  6. Menu:[],
  7. cameraActive: true,
  8. },
  9. onLoad: function (options) {
  10. app.globalData.OCRWords=[];
  11. },
  12. onShow:function(){
  13. let that = this;
  14. that.setData({
  15. cameraActive: true,
  16. Menu:[{Name:"相册",CSS:"",Fun:"chooseImage"},{Name:"拍照",CSS:"Selected",Fun:"retake"}]
  17. });
  18. },
  19. retake() {
  20. this.data.Menu[0].CSS="";
  21. this.data.Menu[1].CSS="Selected";
  22. this.setData({
  23. Menu:this.data.Menu,
  24. cameraActive: true,
  25. });
  26. },
  27. // 拍照识别
  28. takePhoto() {
  29. const ctx = wx.createCameraContext();
  30. ctx.takePhoto({
  31. quality: 'high',
  32. success: (res) => {
  33. this.setData({
  34. imageUrl: res.tempImagePath,
  35. cameraActive: false,
  36. })
  37. this.performOCR(res.tempImagePath)
  38. },
  39. fail: (err) => {
  40. console.error('拍照失败:', err)
  41. wx.showToast({
  42. title: '拍照失败,请重试',
  43. icon: 'none'
  44. })
  45. }
  46. })
  47. },
  48. chooseImage() {
  49. this.data.Menu[0].CSS="Selected";
  50. this.data.Menu[1].CSS="";
  51. this.setData({
  52. Menu:this.data.Menu,
  53. cameraActive: false,
  54. });
  55. wx.chooseMedia({
  56. count: 1,
  57. mediaType: ['image'],
  58. sourceType: ['album'],
  59. success: (res) => {
  60. const tempFilePath = res.tempFiles[0].tempFilePath
  61. this.setData({
  62. imageUrl: tempFilePath,
  63. cameraActive: false,
  64. })
  65. this.performOCR(tempFilePath)
  66. },
  67. fail: (err) => {
  68. console.error('选择图片失败:', err)
  69. wx.showToast({
  70. title: '选择图片失败',
  71. icon: 'none'
  72. })
  73. }
  74. })
  75. },
  76. // 加强版的OCR识别方法
  77. async performOCR(imagePath) {
  78. let that=this;
  79. if (!imagePath) {
  80. console.error('图片路径无效')
  81. }
  82. wx.showLoading({ title: '识别中...', mask: true })
  83. try {
  84. // 1. 压缩图片
  85. const compressed = await new Promise((resolve, reject) => {
  86. wx.compressImage({
  87. src: imagePath,
  88. quality: 70,
  89. success: resolve,
  90. fail: () => resolve({ tempFilePath: imagePath })
  91. })
  92. })
  93. //console.log("1");
  94. // 2. 转换为base64
  95. const fileRes = await new Promise((resolve, reject) => {
  96. wx.getFileSystemManager().readFile({
  97. filePath: compressed.tempFilePath,
  98. encoding: 'base64',
  99. success: resolve,
  100. fail: reject
  101. })
  102. })
  103. //console.log("2");
  104. // 3. 调用云函数(添加超时处理)
  105. let postData={ ImageBase64: `data:image/jpeg;base64,${fileRes.data}` };
  106. let url = common.Encrypt("OCRImageData?UserID="+app.globalData.userInfo.UserID);
  107. url=app.globalData.serverUrl+url;
  108. //console.log("url:"+url);
  109. const cloudRes = await new Promise((resolve, reject) => {
  110. wx.request({
  111. url: url,
  112. method: "POST",
  113. data: postData,
  114. success: resolve,
  115. fail: reject,
  116. })
  117. });
  118. //console.log("3");
  119. // 4. 验证返回结果
  120. if (!cloudRes || !cloudRes.data.result) {
  121. throw new Error('无效的响应格式')
  122. }
  123. //console.log("4");
  124. if (!cloudRes.data.result) {
  125. throw new Error(cloudRes.data.result.message || '识别服务返回空数据')
  126. }
  127. // 5. 处理识别结果
  128. const texts = cloudRes.data.result.TextDetections.map(item => ({
  129. text: item.DetectedText || '未识别到文字',
  130. pos: this.convertPosition(item.ItemPolygon || { Points: [] })
  131. })).filter(item => item.DetectedText !== '未识别到文字')
  132. //console.log("5");
  133. if (texts.length === 0) {
  134. throw new Error('未识别到有效文字')
  135. }
  136. // 6.提取英文单词
  137. const engTexts=this.extractEnglishWords(texts);
  138. // let arr=[];
  139. // for(var i=0;i<engTexts.length;i++){
  140. // let obj={};
  141. // obj.Word=engTexts[i];
  142. // obj.Selected=0;
  143. // arr.push(obj);
  144. // }
  145. app.globalData.OCRWords=engTexts;
  146. //debugger;
  147. wx.redirectTo({
  148. url: "./selectword",
  149. });
  150. that.setData({
  151. cameraActive: true,
  152. })
  153. } catch (err) {
  154. console.error('OCR处理失败:', err)
  155. wx.showToast({
  156. title: err.message || '识别失败',
  157. icon: 'none',
  158. duration: 3000
  159. })
  160. } finally {
  161. wx.hideLoading();
  162. that.setData({
  163. cameraActive: true,
  164. })
  165. }
  166. },
  167. // 加强坐标转换
  168. convertPosition(polygon) {
  169. try {
  170. const points = polygon.Points || []
  171. if (points.length === 0) {
  172. return { x: 50, y: 50, width: 200, height: 30 }
  173. }
  174. const xs = points.map(p => p.X || 0)
  175. const ys = points.map(p => p.Y || 0)
  176. return {
  177. x: Math.min(...xs),
  178. y: Math.min(...ys),
  179. width: Math.max(...xs) - Math.min(...xs),
  180. height: Math.max(...ys) - Math.min(...ys)
  181. }
  182. } catch (e) {
  183. return { x: 50, y: 50, width: 200, height: 30 }
  184. }
  185. },
  186. // 提取英语单词的函数 - 增强版
  187. extractEnglishWords(texts) {
  188. //console.group('英语单词提取');
  189. const words = new Set();
  190. texts.forEach(item => {
  191. const text = item.text;
  192. //console.log('处理文本:', text);
  193. // 使用多种分隔符分割文本(空格、逗号、句号、感叹号、中文字符等)
  194. // 这个正则表达式会匹配任何非英文字母、撇号或连字符的字符作为分隔符
  195. const parts = text.split(/[^A-Za-z''-]+/).filter(Boolean);
  196. //console.log('分割后的部分:', parts);
  197. // 处理每个可能的单词
  198. parts.forEach(part => {
  199. // 清理并验证单词
  200. const cleanWord = this.cleanWord(part);
  201. // 特殊处理单词"I"
  202. if (cleanWord === 'I' || cleanWord === 'a') {
  203. words.add(cleanWord); // 添加小写的"i"
  204. //console.log('添加单词: I (特殊处理)');
  205. }
  206. // 处理其他单词(长度>=2)
  207. else if (cleanWord && cleanWord.length >= 2 && /^[A-Za-z''-]+$/.test(cleanWord)) {
  208. let lowerWord = cleanWord.toLowerCase();
  209. if (lowerWord=="i'm"){
  210. lowerWord="I'm";
  211. }
  212. words.add(lowerWord);
  213. //console.log('添加单词:', lowerWord);
  214. }
  215. });
  216. });
  217. //const result = Array.from(words).sort();
  218. let result = Array.from(words);
  219. result = common.removeDuplicateAndTrimStrings(result);
  220. //console.log('提取结果:', result);
  221. //console.groupEnd();
  222. return result;
  223. },
  224. // 清理单词,去除非字母字符
  225. cleanWord(word) {
  226. if (!word) return '';
  227. // 去除单词前后的非字母字符
  228. const cleaned = word.replace(/^[^A-Za-z]+|[^A-Za-z]+$/g, '');
  229. // 保留单词中间的撇号和连字符
  230. return cleaned.replace(/[^A-Za-z''-]/g, '');
  231. },
  232. onShareAppMessage: function () {
  233. return {
  234. title: app.globalData.ShareTitle,
  235. path: app.globalData.SharePath + '?UserID=' + app.globalData.userInfo.UserID,
  236. imageUrl: app.globalData.ShareImage,
  237. }
  238. },
  239. })