jiexiExcel.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. import os
  2. import xlrd
  3. import json
  4. from collections import defaultdict
  5. def parse_excel_files(directory,only_ask):
  6. result = []
  7. # 遍历指定目录下的所有文件和文件夹
  8. for root, dirs, files in os.walk(directory):
  9. # 获取文件夹名称
  10. folder_name = os.path.basename(os.path.normpath(root))
  11. for file in files:
  12. # 仅处理扩展名为 .xlsx 或 .xls 的文件
  13. if file.endswith('.xlsx') or file.endswith('.xls'):
  14. file_path = os.path.join(root, file)
  15. # 使用 xlrd 打开 Excel 文件
  16. workbook = xlrd.open_workbook(file_path)
  17. # 遍历每个 sheet
  18. for sheet in workbook.sheets():
  19. rows = sheet.nrows
  20. for row in range(rows):
  21. row_data = []
  22. # 跳过表头
  23. if row == 0:
  24. continue
  25. # 跳过空行
  26. if all(sheet.cell_value(row, col) == '' for col in range(sheet.ncols)):
  27. continue
  28. # 遍历每一列
  29. for col in range(sheet.ncols):
  30. # 获取表头
  31. header = sheet.cell_value(0, col)
  32. # 获取单元格值
  33. cell_value = sheet.cell_value(row, col)
  34. if cell_value:
  35. if header=='问':
  36. if only_ask is None:
  37. cell_value= '问:'+str(cell_value)
  38. else:
  39. cell_value= str(cell_value).strip()
  40. row_data.append(cell_value)
  41. if header=='答' and only_ask==None:
  42. cell_value= '答:'+str(cell_value)
  43. row_data.append(cell_value)
  44. # 将有效行数据添加到结果中
  45. if row_data:
  46. result.append(row_data)
  47. print(len(result))
  48. return result
  49. def write_data_to_file(input_directory,output_file,only_ask=None):
  50. # 解析 Excel 文件并写入数据到文件
  51. excel_data = parse_excel_files(input_directory,only_ask)
  52. with open(output_file, 'w', encoding='utf-8') as f:
  53. # 遍历每行内容
  54. for row in excel_data:
  55. # 使用json.dumps将行数据转换为JSON格式的字符串
  56. json_row = json.dumps(row, ensure_ascii=False)
  57. # 写入文件并添加换行符
  58. f.write(json_row + '\n')
  59. def formatTjl(directory):
  60. result = []
  61. # 遍历指定目录下的所有文件和文件夹
  62. for root, dirs, files in os.walk(directory):
  63. for file in files:
  64. # 处理扩展名为 .xlsx 和 .xls 的文件
  65. if file.endswith('.xlsx') or file.endswith('.xls'):
  66. file_path = os.path.join(root, file)
  67. if file.endswith('.xls'):
  68. # 使用 xlrd 打开 Excel 文件
  69. workbook = xlrd.open_workbook(file_path)
  70. for sheet in workbook.sheets():
  71. print(sheet.name)
  72. for row_idx in range(sheet.nrows):
  73. row_data = {
  74. 'data': '',
  75. 'time': '',
  76. 'CLJYSSLL': '',
  77. 'SJTYJLY': '',
  78. }
  79. # 跳过表头
  80. if row_idx < 4:
  81. continue
  82. # 跳过空行
  83. if all(sheet.cell_value(row_idx, col_idx) == '' for col_idx in range(sheet.ncols)):
  84. continue
  85. # 遍历每一列
  86. for col_idx in range(sheet.ncols):
  87. # 获取表头
  88. header = sheet.cell_value(3, col_idx)
  89. cell_value = sheet.cell_value(row_idx, col_idx)
  90. if cell_value:
  91. if header == '日期':
  92. row_data['data'] = str(cell_value)
  93. elif header == '时间':
  94. row_data['time'] = str(cell_value)
  95. elif header == '除磷加药瞬时流量':
  96. row_data['CLJYSSLL'] = str(cell_value)
  97. elif header == '乙酸钠加药瞬时流量转换':
  98. row_data['SJTYJLY'] = str(cell_value)
  99. # 将有效行数据添加到结果中
  100. result.append(row_data)
  101. # 根据 data 和 time 进行排序
  102. def sort_key(row):
  103. return (row['data'])
  104. result.sort(key=sort_key)
  105. # print(len(result))
  106. output_file = './excel/ceshi.json'
  107. with open(output_file, 'w', encoding='utf-8') as f:
  108. for row in result:
  109. json_row = json.dumps(row, ensure_ascii=False)
  110. f.write(json_row + '\n')
  111. if __name__ == "__main__":
  112. # 指定目录路径和输出文件路径
  113. input_directory = '/Users/yushanghui/hongshantianping/git/dataTools/book/edit/'
  114. output_file = '/Users/yushanghui/hongshantianping/git/dataTools/book/json/edit.json'
  115. write_data_to_file(input_directory,output_file)