init.py 1.3 KB

1234567891011121314151617181920212223242526272829303132
  1. from jiexiExcel import write_data_to_file # 导入特定的函数
  2. from checkBad import beginCheckBadFun# excel文件夹路径
  3. from formatData import formatAlpaca# excel文件夹路径
  4. from merge import merge_json_files # 合并所有为一个训练脚本
  5. # import asyncio
  6. # 清洗数据函数
  7. def formatDataFun ():
  8. input_directory = '/Users/yushanghui/hongshantianping/git/dataTools/book/迭代修改/'
  9. # 把excel文件转成json文件 并保存到指定路径
  10. output_file = '/Users/yushanghui/hongshantianping/git/dataTools/book/json/diedaiEdit.json'
  11. # 通过checkBad.py 函数进行过滤指定输出文件夹
  12. out_file = '/Users/yushanghui/hongshantianping/git/dataTools/book/jsonOut/diedaiEdit'
  13. alpaca_file = '/Users/yushanghui/hongshantianping/git/dataTools/book/alpaca/diedaiEdit.json'
  14. write_data_to_file(input_directory,output_file)
  15. beginCheckBadFun(output_file,out_file)
  16. formatAlpaca(out_file,alpaca_file)
  17. async def mergedFun ():
  18. input_folder1 = './book/alpaca' # 输入文件夹路径
  19. output_file1 = './book/all/merged.json' # 输出文件名
  20. # 第一次执行 merge_json_files
  21. await merge_json_files(input_folder1, output_file1)
  22. if __name__ == "__main__":
  23. # asyncio.run(mergedFun())
  24. formatDataFun()