import os import xlrd import json from collections import defaultdict def parse_excel_files(directory,only_ask): result = [] # 遍历指定目录下的所有文件和文件夹 for root, dirs, files in os.walk(directory): # 获取文件夹名称 folder_name = os.path.basename(os.path.normpath(root)) for file in files: # 仅处理扩展名为 .xlsx 或 .xls 的文件 if file.endswith('.xlsx') or file.endswith('.xls'): file_path = os.path.join(root, file) # 使用 xlrd 打开 Excel 文件 workbook = xlrd.open_workbook(file_path) # 遍历每个 sheet for sheet in workbook.sheets(): rows = sheet.nrows for row in range(rows): row_data = [] # 跳过表头 if row == 0: continue # 跳过空行 if all(sheet.cell_value(row, col) == '' for col in range(sheet.ncols)): continue # 遍历每一列 for col in range(sheet.ncols): # 获取表头 header = sheet.cell_value(0, col) # 获取单元格值 cell_value = sheet.cell_value(row, col) if cell_value: if header=='问': if only_ask is None: cell_value= '问:'+str(cell_value) else: cell_value= str(cell_value).strip() row_data.append(cell_value) if header=='答' and only_ask==None: cell_value= '答:'+str(cell_value) row_data.append(cell_value) # 将有效行数据添加到结果中 if row_data: result.append(row_data) print(len(result)) return result def write_data_to_file(input_directory,output_file,only_ask=None): # 解析 Excel 文件并写入数据到文件 excel_data = parse_excel_files(input_directory,only_ask) with open(output_file, 'w', encoding='utf-8') as f: # 遍历每行内容 for row in excel_data: # 使用json.dumps将行数据转换为JSON格式的字符串 json_row = json.dumps(row, ensure_ascii=False) # 写入文件并添加换行符 f.write(json_row + '\n') def formatTjl(directory): result = [] # 遍历指定目录下的所有文件和文件夹 for root, dirs, files in os.walk(directory): for file in files: # 处理扩展名为 .xlsx 和 .xls 的文件 if file.endswith('.xlsx') or file.endswith('.xls'): file_path = os.path.join(root, file) if file.endswith('.xls'): # 使用 xlrd 打开 Excel 文件 workbook = xlrd.open_workbook(file_path) for sheet in workbook.sheets(): print(sheet.name) for row_idx in range(sheet.nrows): row_data = { 'data': '', 'time': '', 'CLJYSSLL': '', 'SJTYJLY': '', } # 跳过表头 if row_idx < 4: continue # 跳过空行 if all(sheet.cell_value(row_idx, col_idx) == '' for col_idx in range(sheet.ncols)): continue # 遍历每一列 for col_idx in range(sheet.ncols): # 获取表头 header = sheet.cell_value(3, col_idx) cell_value = sheet.cell_value(row_idx, col_idx) if cell_value: if header == '日期': row_data['data'] = str(cell_value) elif header == '时间': row_data['time'] = str(cell_value) elif header == '除磷加药瞬时流量': row_data['CLJYSSLL'] = str(cell_value) elif header == '乙酸钠加药瞬时流量转换': row_data['SJTYJLY'] = str(cell_value) # 将有效行数据添加到结果中 result.append(row_data) # 根据 data 和 time 进行排序 def sort_key(row): return (row['data']) result.sort(key=sort_key) # print(len(result)) output_file = './excel/ceshi.json' with open(output_file, 'w', encoding='utf-8') as f: for row in result: json_row = json.dumps(row, ensure_ascii=False) f.write(json_row + '\n') if __name__ == "__main__": # 指定目录路径和输出文件路径 input_directory = '/Users/yushanghui/hongshantianping/git/dataTools/book/edit/' output_file = '/Users/yushanghui/hongshantianping/git/dataTools/book/json/edit.json' write_data_to_file(input_directory,output_file)