useExcelParser.ts 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. import { useState } from 'react';
  2. import * as XLSX from 'xlsx';
  3. import { message } from 'antd';
  4. import { ExcelRow, SheetConfig } from '../types';
  5. type ExcelJsonData = Array<Array<string | number | null>>;
  6. export const useExcelParser = () => {
  7. const [jsonData, setJsonData] = useState<{ [key: string]: ExcelRow[] }>({});
  8. // 存储所有字段的原始数据,不受exportFields配置影响
  9. const [allFieldsData, setAllFieldsData] = useState<{ [key: string]: ExcelRow[] }>({});
  10. const processDataForExport = (
  11. data: ExcelRow[],
  12. sheetConfig: SheetConfig,
  13. tableIndex: number = 0
  14. ): ExcelRow[] => {
  15. return data
  16. .map((row, index) => {
  17. const processedRow: ExcelRow = {};
  18. processedRow._id = `table-${tableIndex}-row-${index}`;
  19. // 如果exportFields为空,保留所有原始字段
  20. if (sheetConfig.exportFields.length === 0) {
  21. Object.keys(row).forEach(field => {
  22. processedRow[field] = row[field] ?? null;
  23. });
  24. } else {
  25. // 按照exportFields配置处理
  26. sheetConfig.exportFields.forEach(field => {
  27. const mappedField = sheetConfig.fieldMappings[field] || field;
  28. processedRow[mappedField] = row[field] ?? null;
  29. });
  30. }
  31. return processedRow;
  32. })
  33. .filter(row => {
  34. // 如果没有设置必需字段或exportFields为空,则不过滤
  35. if (sheetConfig.requiredFields.length === 0 || sheetConfig.exportFields.length === 0) {
  36. return true;
  37. }
  38. // 检查必需字段是否有值
  39. return sheetConfig.requiredFields.every(field => {
  40. const value = row[sheetConfig.fieldMappings[field] || field];
  41. return value !== null && value !== undefined && value !== '';
  42. });
  43. });
  44. };
  45. // 处理所有原始字段数据,不过滤
  46. const processAllFieldsData = (
  47. data: ExcelRow[],
  48. tableIndex: number = 0
  49. ): ExcelRow[] => {
  50. return data.map((row, index) => {
  51. const processedRow: ExcelRow = { ...row };
  52. processedRow._id = `table-${tableIndex}-row-${index}`;
  53. return processedRow;
  54. });
  55. };
  56. // 获取所有可用字段
  57. const getAllAvailableFields = (data: { [key: string]: ExcelRow[] }): { [key: string]: string[] } => {
  58. // 按工作表名收集字段
  59. const fieldsMap: { [key: string]: Set<string> } = {};
  60. // 遍历每个工作表
  61. Object.entries(data).forEach(([sheetName, rows]) => {
  62. if (!fieldsMap[sheetName]) {
  63. fieldsMap[sheetName] = new Set<string>();
  64. }
  65. // 从每行中收集字段
  66. rows.forEach(row => {
  67. Object.keys(row).forEach(key => {
  68. // 过滤掉内部属性(如_id, tableIndex等)
  69. if (!key.startsWith('_')) {
  70. fieldsMap[sheetName].add(key);
  71. }
  72. });
  73. });
  74. });
  75. // 将Set转换为数组
  76. const result: { [key: string]: string[] } = {};
  77. Object.entries(fieldsMap).forEach(([sheetName, fieldsSet]) => {
  78. result[sheetName] = Array.from(fieldsSet);
  79. });
  80. return result;
  81. };
  82. // 处理单个表格的数据
  83. const parseSingleTable = (
  84. json: ExcelJsonData,
  85. headers: string[],
  86. startRow: number,
  87. endMarker: string,
  88. orderNumberRow: number,
  89. orderNumberCol: number,
  90. productNameRow?: number,
  91. productNameCol?: number
  92. ): { data: ExcelRow[]; endIndex: number } => {
  93. const tableData: ExcelRow[] = [];
  94. let currentOrderNumber: string | null = null;
  95. let currentProductName: string | null = null;
  96. let endIndex = json.length;
  97. // 获取订单号
  98. if (orderNumberRow > 0 && orderNumberCol > 0) {
  99. currentOrderNumber = String(json[orderNumberRow - 1]?.[orderNumberCol - 1] ?? '');
  100. }
  101. // 获取产品名称(如果配置了产品名称行列号)
  102. if (productNameRow && productNameCol && productNameRow > 0 && productNameCol > 0) {
  103. currentProductName = String(json[productNameRow - 1]?.[productNameCol - 1] ?? '');
  104. }
  105. for (let i = startRow; i < json.length; i++) {
  106. const row = json[i] || [];
  107. const rowString = row.join('');
  108. if (rowString.includes(endMarker)) {
  109. endIndex = i;
  110. break;
  111. }
  112. const obj: ExcelRow = {};
  113. let hasData = false;
  114. headers.forEach((header, index) => {
  115. if (header) {
  116. const value = row[index];
  117. if (value !== undefined) {
  118. obj[header] = value;
  119. hasData = true;
  120. } else {
  121. obj[header] = null;
  122. }
  123. }
  124. });
  125. if (currentOrderNumber) {
  126. obj['订单号'] = currentOrderNumber;
  127. }
  128. if (currentProductName) {
  129. obj['产品名称'] = currentProductName;
  130. }
  131. if (hasData) {
  132. tableData.push(obj);
  133. }
  134. }
  135. return { data: tableData, endIndex };
  136. };
  137. // 处理多表格模式
  138. const parseMultiTable = (
  139. json: ExcelJsonData,
  140. sheetConfig: SheetConfig
  141. ): ExcelRow[][] => {
  142. const tableDataSets: ExcelRow[][] = [];
  143. let currentIndex = sheetConfig.dataStartRow - 1;
  144. const headerOffset = sheetConfig.multiTableHeaderOffset || 1;
  145. const dataOffset = sheetConfig.multiTableDataOffset || 1;
  146. const orderNumberOffset = sheetConfig.multiTableOrderNumberOffset || -1;
  147. const productNameOffset = sheetConfig.multiTableProductNameOffset || -1;
  148. while (currentIndex < json.length) {
  149. // 获取当前表格的表头行号
  150. const headerIndex = currentIndex - (sheetConfig.dataStartRow - sheetConfig.headerRowIndex);
  151. // 获取当前表格的表头
  152. const headers = (json[headerIndex] || []) as string[];
  153. if (!headers || headers.length === 0) break;
  154. // 计算订单号行号
  155. const orderNumberIndex = headerIndex + (orderNumberOffset || 0);
  156. // 计算产品名称行号(如果配置了产品名称行列号)
  157. let productNameIndex = undefined;
  158. if (sheetConfig.productNameRow && sheetConfig.productNameCol) {
  159. productNameIndex = headerIndex + (productNameOffset || 0);
  160. }
  161. // 解析当前表格
  162. const { data, endIndex } = parseSingleTable(
  163. json,
  164. headers,
  165. currentIndex,
  166. sheetConfig.endMarker,
  167. orderNumberIndex + 1, // +1 因为 parseSingleTable 内部会 -1
  168. sheetConfig.orderNumberCol,
  169. productNameIndex !== undefined ? productNameIndex + 1 : undefined, // +1 因为 parseSingleTable 内部会 -1
  170. sheetConfig.productNameCol
  171. );
  172. if (data.length > 0) {
  173. tableDataSets.push(data);
  174. }
  175. // 如果没有找到结束标记,退出循环
  176. if (endIndex === json.length) break;
  177. // 更新下一个表格的起始位置
  178. currentIndex = endIndex + headerOffset + dataOffset;
  179. }
  180. return tableDataSets;
  181. };
  182. const parseExcelFile = async (file: File, sheetConfigs: SheetConfig[]) => {
  183. try {
  184. // 显示开始处理的消息
  185. const loadingMessage = message.loading({ content: `正在解析Excel文件: ${file.name}`, key: 'excelParsing', duration: 0 });
  186. console.log('开始处理文件:', file.name);
  187. const data = await file.arrayBuffer();
  188. // 更新进度提示
  189. message.loading({ content: '正在读取Excel数据...', key: 'excelParsing', duration: 0 });
  190. const workbook = XLSX.read(data);
  191. const rawData: { [key: string]: ExcelRow[] } = {};
  192. let totalTables = 0;
  193. let processedSheets = 0;
  194. // 处理每个配置的工作表
  195. for (const sheetConfig of sheetConfigs) {
  196. // 更新进度提示,显示当前处理的工作表
  197. message.loading({
  198. content: `正在处理工作表 "${sheetConfig.sheetName}" (${processedSheets + 1}/${sheetConfigs.length})`,
  199. key: 'excelParsing',
  200. duration: 0
  201. });
  202. const worksheet = workbook.Sheets[sheetConfig.sheetName];
  203. if (!worksheet) {
  204. console.error('未找到工作表:', sheetConfig.sheetName);
  205. message.error({
  206. content: `未找到工作表"${sheetConfig.sheetName}"!`,
  207. key: 'excelParsing'
  208. });
  209. processedSheets++;
  210. continue;
  211. }
  212. const json = XLSX.utils.sheet_to_json<string[]>(worksheet, { header: 1 }) as ExcelJsonData;
  213. const headers = json[sheetConfig.headerRowIndex - 1] as string[];
  214. if (!headers || headers.length === 0) {
  215. console.error(`工作表 ${sheetConfig.sheetName} 的表头数据无效`);
  216. message.warning({
  217. content: `工作表 "${sheetConfig.sheetName}" 的表头数据无效,已跳过`,
  218. key: 'excelParsing'
  219. });
  220. processedSheets++;
  221. continue;
  222. }
  223. if (sheetConfig.isMultiTable) {
  224. // 处理多表格模式
  225. message.loading({
  226. content: `正在处理 "${sheetConfig.sheetName}" 的多表格数据...`,
  227. key: 'excelParsing',
  228. duration: 0
  229. });
  230. const tableDataSets = parseMultiTable(json, sheetConfig);
  231. if (tableDataSets.length > 0) {
  232. message.loading({
  233. content: `"${sheetConfig.sheetName}" 中发现 ${tableDataSets.length} 个数据表,正在处理...`,
  234. key: 'excelParsing',
  235. duration: 0
  236. });
  237. // 保存所有原始字段数据
  238. rawData[sheetConfig.sheetName] = tableDataSets.flatMap((tableData, tableIndex) => {
  239. return processAllFieldsData(tableData, tableIndex + 1).map(row => ({
  240. ...row,
  241. tableIndex: tableIndex + 1,
  242. }));
  243. });
  244. totalTables += tableDataSets.length;
  245. } else {
  246. message.warning({
  247. content: `未在 "${sheetConfig.sheetName}" 中找到有效的数据表`,
  248. duration: 2
  249. });
  250. }
  251. } else {
  252. // 处理单表格模式
  253. message.loading({
  254. content: `正在处理 "${sheetConfig.sheetName}" 的单表格数据...`,
  255. key: 'excelParsing',
  256. duration: 0
  257. });
  258. const { data } = parseSingleTable(
  259. json,
  260. headers,
  261. sheetConfig.dataStartRow - 1,
  262. sheetConfig.endMarker,
  263. sheetConfig.orderNumberRow,
  264. sheetConfig.orderNumberCol,
  265. sheetConfig.productNameRow || undefined,
  266. sheetConfig.productNameCol || undefined
  267. );
  268. if (data.length > 0) {
  269. // 保存所有原始字段数据
  270. rawData[sheetConfig.sheetName] = processAllFieldsData(data, 1);
  271. totalTables += 1;
  272. } else {
  273. message.warning({
  274. content: `未在 "${sheetConfig.sheetName}" 中找到有效数据`,
  275. duration: 2
  276. });
  277. }
  278. }
  279. processedSheets++;
  280. // 更新进度提示
  281. if (processedSheets < sheetConfigs.length) {
  282. message.loading({
  283. content: `已处理 ${processedSheets}/${sheetConfigs.length} 个工作表...`,
  284. key: 'excelParsing',
  285. duration: 0
  286. });
  287. }
  288. }
  289. // 更新进度提示
  290. message.loading({
  291. content: '正在整理数据字段...',
  292. key: 'excelParsing',
  293. duration: 0
  294. });
  295. // 更新状态
  296. setAllFieldsData(rawData);
  297. // 计算可用字段(按工作表分类)
  298. const availableFieldsBySheet = getAllAvailableFields(rawData);
  299. // 完成解析,显示成功消息
  300. message.success({
  301. content: `Excel文件解析成功!共处理 ${Object.keys(rawData).length} 个工作表,${totalTables} 个数据表`,
  302. key: 'excelParsing',
  303. duration: 3
  304. });
  305. console.log('所有字段原始数据:', rawData);
  306. console.log('按工作表分类的可用字段:', availableFieldsBySheet);
  307. // 返回原始数据、分类字段
  308. return {
  309. rawData,
  310. availableFieldsBySheet,
  311. };
  312. } catch (error) {
  313. console.error('文件处理错误:', error);
  314. message.error({
  315. content: '文件处理失败,请检查文件格式是否正确!',
  316. key: 'excelParsing'
  317. });
  318. throw error;
  319. }
  320. };
  321. // 根据当前配置动态生成导出数据
  322. const generateExportData = (sheetConfigs: SheetConfig[]) => {
  323. // 如果没有原始数据,直接返回空对象
  324. if (Object.keys(allFieldsData).length === 0) {
  325. return {};
  326. }
  327. const exportData: { [key: string]: ExcelRow[] } = {};
  328. // 处理每个工作表的数据
  329. for (const sheetConfig of sheetConfigs) {
  330. const rawDataForSheet = allFieldsData[sheetConfig.sheetName];
  331. if (!rawDataForSheet || rawDataForSheet.length === 0) {
  332. continue;
  333. }
  334. // 根据当前配置处理数据
  335. exportData[sheetConfig.sheetName] = processDataForExport(
  336. rawDataForSheet,
  337. sheetConfig,
  338. 1
  339. );
  340. }
  341. // 只有当数据确实发生变化时才更新状态,避免不必要的渲染
  342. const currentJsonData = JSON.stringify(jsonData);
  343. const newJsonData = JSON.stringify(exportData);
  344. if (currentJsonData !== newJsonData) {
  345. // 更新 jsonData 状态
  346. setJsonData(exportData);
  347. console.log('生成的导出数据:', exportData);
  348. }
  349. return exportData;
  350. };
  351. const getFieldsBySheet = getAllAvailableFields;
  352. return {
  353. jsonData,
  354. allFieldsData,
  355. parseExcelFile,
  356. generateExportData,
  357. getFieldsBySheet
  358. };
  359. };