123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131 |
- import os
- import xlrd
- import json
- from collections import defaultdict
- def parse_excel_files(directory,only_ask):
- result = []
- # 遍历指定目录下的所有文件和文件夹
- for root, dirs, files in os.walk(directory):
- # 获取文件夹名称
- folder_name = os.path.basename(os.path.normpath(root))
- for file in files:
- # 仅处理扩展名为 .xlsx 或 .xls 的文件
- if file.endswith('.xlsx') or file.endswith('.xls'):
- file_path = os.path.join(root, file)
-
- # 使用 xlrd 打开 Excel 文件
- workbook = xlrd.open_workbook(file_path)
- # 遍历每个 sheet
- for sheet in workbook.sheets():
- rows = sheet.nrows
- for row in range(rows):
- row_data = []
- # 跳过表头
- if row == 0:
- continue
- # 跳过空行
- if all(sheet.cell_value(row, col) == '' for col in range(sheet.ncols)):
- continue
- # 遍历每一列
- for col in range(sheet.ncols):
- # 获取表头
- header = sheet.cell_value(0, col)
- # 获取单元格值
- cell_value = sheet.cell_value(row, col)
- if cell_value:
- if header=='问':
- if only_ask is None:
- cell_value= '问:'+str(cell_value)
- else:
- cell_value= str(cell_value).strip()
- row_data.append(cell_value)
- if header=='答' and only_ask==None:
- cell_value= '答:'+str(cell_value)
- row_data.append(cell_value)
- # 将有效行数据添加到结果中
- if row_data:
- result.append(row_data)
- print(len(result))
- return result
- def write_data_to_file(input_directory,output_file,only_ask=None):
-
- # 解析 Excel 文件并写入数据到文件
- excel_data = parse_excel_files(input_directory,only_ask)
- with open(output_file, 'w', encoding='utf-8') as f:
- # 遍历每行内容
- for row in excel_data:
- # 使用json.dumps将行数据转换为JSON格式的字符串
- json_row = json.dumps(row, ensure_ascii=False)
- # 写入文件并添加换行符
- f.write(json_row + '\n')
- def formatTjl(directory):
- result = []
-
- # 遍历指定目录下的所有文件和文件夹
- for root, dirs, files in os.walk(directory):
- for file in files:
- # 处理扩展名为 .xlsx 和 .xls 的文件
- if file.endswith('.xlsx') or file.endswith('.xls'):
- file_path = os.path.join(root, file)
-
-
- if file.endswith('.xls'):
- # 使用 xlrd 打开 Excel 文件
- workbook = xlrd.open_workbook(file_path)
- for sheet in workbook.sheets():
- print(sheet.name)
- for row_idx in range(sheet.nrows):
- row_data = {
- 'data': '',
- 'time': '',
- 'CLJYSSLL': '',
- 'SJTYJLY': '',
- }
- # 跳过表头
- if row_idx < 4:
- continue
- # 跳过空行
- if all(sheet.cell_value(row_idx, col_idx) == '' for col_idx in range(sheet.ncols)):
- continue
- # 遍历每一列
- for col_idx in range(sheet.ncols):
- # 获取表头
- header = sheet.cell_value(3, col_idx)
-
- cell_value = sheet.cell_value(row_idx, col_idx)
- if cell_value:
- if header == '日期':
- row_data['data'] = str(cell_value)
- elif header == '时间':
- row_data['time'] = str(cell_value)
- elif header == '除磷加药瞬时流量':
- row_data['CLJYSSLL'] = str(cell_value)
- elif header == '乙酸钠加药瞬时流量转换':
- row_data['SJTYJLY'] = str(cell_value)
- # 将有效行数据添加到结果中
- result.append(row_data)
-
- # 根据 data 和 time 进行排序
- def sort_key(row):
- return (row['data'])
-
- result.sort(key=sort_key)
- # print(len(result))
- output_file = './excel/ceshi.json'
- with open(output_file, 'w', encoding='utf-8') as f:
- for row in result:
- json_row = json.dumps(row, ensure_ascii=False)
- f.write(json_row + '\n')
-
-
- if __name__ == "__main__":
- # 指定目录路径和输出文件路径
- input_directory = '/Users/yushanghui/hongshantianping/git/dataTools/book/edit/'
- output_file = '/Users/yushanghui/hongshantianping/git/dataTools/book/json/edit.json'
- write_data_to_file(input_directory,output_file)
|