jiexiExcel.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. import os
  2. import xlrd
  3. import json
  4. from collections import defaultdict
  5. def parse_excel_files(directory,only_ask):
  6. result = []
  7. # 遍历指定目录下的所有文件和文件夹
  8. for root, dirs, files in os.walk(directory):
  9. for file in files:
  10. # 仅处理扩展名为 .xlsx 或 .xls 的文件
  11. if file.endswith('.xlsx') or file.endswith('.xls'):
  12. file_path = os.path.join(root, file)
  13. # 使用 xlrd 打开 Excel 文件
  14. workbook = xlrd.open_workbook(file_path)
  15. # 遍历每个 sheet
  16. for sheet in workbook.sheets():
  17. rows = sheet.nrows
  18. for row in range(rows):
  19. row_data = []
  20. # 跳过表头
  21. if row == 0:
  22. continue
  23. # 跳过空行
  24. if all(sheet.cell_value(row, col) == '' for col in range(sheet.ncols)):
  25. continue
  26. # 遍历每一列
  27. for col in range(sheet.ncols):
  28. # 获取表头
  29. header = sheet.cell_value(0, col)
  30. # 获取单元格值
  31. cell_value = sheet.cell_value(row, col)
  32. if cell_value:
  33. if header=='问':
  34. if only_ask is None:
  35. cell_value= '问:'+str(cell_value)
  36. else:
  37. cell_value= str(cell_value).strip()
  38. row_data.append(cell_value)
  39. if header=='答' and only_ask==None:
  40. cell_value= '答:'+str(cell_value)
  41. row_data.append(cell_value)
  42. # 将有效行数据添加到结果中
  43. if row_data:
  44. result.append(row_data)
  45. print(len(result))
  46. return result
  47. def write_data_to_file(input_directory,output_file,only_ask=None):
  48. # 解析 Excel 文件并写入数据到文件
  49. excel_data = parse_excel_files(input_directory,only_ask)
  50. with open(output_file, 'w', encoding='utf-8') as f:
  51. # 遍历每行内容
  52. for row in excel_data:
  53. # 使用json.dumps将行数据转换为JSON格式的字符串
  54. json_row = json.dumps(row, ensure_ascii=False)
  55. # 写入文件并添加换行符
  56. f.write(json_row + '\n')
  57. def formatTjl(directory):
  58. result = []
  59. # 遍历指定目录下的所有文件和文件夹
  60. for root, dirs, files in os.walk(directory):
  61. for file in files:
  62. # 处理扩展名为 .xlsx 和 .xls 的文件
  63. if file.endswith('.xlsx') or file.endswith('.xls'):
  64. file_path = os.path.join(root, file)
  65. if file.endswith('.xls'):
  66. # 使用 xlrd 打开 Excel 文件
  67. workbook = xlrd.open_workbook(file_path)
  68. for sheet in workbook.sheets():
  69. print(sheet.name)
  70. for row_idx in range(sheet.nrows):
  71. row_data = {
  72. 'data': '',
  73. 'time': '',
  74. 'CLJYSSLL': '',
  75. 'SJTYJLY': '',
  76. }
  77. # 跳过表头
  78. if row_idx < 4:
  79. continue
  80. # 跳过空行
  81. if all(sheet.cell_value(row_idx, col_idx) == '' for col_idx in range(sheet.ncols)):
  82. continue
  83. # 遍历每一列
  84. for col_idx in range(sheet.ncols):
  85. # 获取表头
  86. header = sheet.cell_value(3, col_idx)
  87. cell_value = sheet.cell_value(row_idx, col_idx)
  88. if cell_value:
  89. if header == '日期':
  90. row_data['data'] = str(cell_value)
  91. elif header == '时间':
  92. row_data['time'] = str(cell_value)
  93. elif header == '除磷加药瞬时流量':
  94. row_data['CLJYSSLL'] = str(cell_value)
  95. elif header == '乙酸钠加药瞬时流量转换':
  96. row_data['SJTYJLY'] = str(cell_value)
  97. # 将有效行数据添加到结果中
  98. result.append(row_data)
  99. # 根据 data 和 time 进行排序
  100. def sort_key(row):
  101. return (row['data'])
  102. result.sort(key=sort_key)
  103. # print(len(result))
  104. output_file = './excel/ceshi.json'
  105. with open(output_file, 'w', encoding='utf-8') as f:
  106. for row in result:
  107. json_row = json.dumps(row, ensure_ascii=False)
  108. f.write(json_row + '\n')
  109. if __name__ == "__main__":
  110. # 指定目录路径和输出文件路径
  111. input_directory = '/Users/yushanghui/hongshantianping/git/dataTools/book/edit/'
  112. output_file = '/Users/yushanghui/hongshantianping/git/dataTools/book/json/edit.json'
  113. write_data_to_file(input_directory,output_file)