1234567891011121314151617181920212223242526272829303132 |
- from jiexiExcel import write_data_to_file # 导入特定的函数
- from checkBad import beginCheckBadFun# excel文件夹路径
- from formatData import formatAlpaca# excel文件夹路径
- from merge import merge_json_files # 合并所有为一个训练脚本
- # import asyncio
- # 清洗数据函数
- def formatDataFun ():
-
- input_directory = '/Users/yushanghui/hongshantianping/git/dataTools/book/迭代修改/'
- # 把excel文件转成json文件 并保存到指定路径
- output_file = '/Users/yushanghui/hongshantianping/git/dataTools/book/json/diedaiEdit.json'
- # 通过checkBad.py 函数进行过滤指定输出文件夹
- out_file = '/Users/yushanghui/hongshantianping/git/dataTools/book/jsonOut/diedaiEdit'
- alpaca_file = '/Users/yushanghui/hongshantianping/git/dataTools/book/alpaca/diedaiEdit.json'
-
- write_data_to_file(input_directory,output_file)
- beginCheckBadFun(output_file,out_file)
- formatAlpaca(out_file,alpaca_file)
- async def mergedFun ():
-
- input_folder1 = './book/alpaca' # 输入文件夹路径
- output_file1 = './book/all/merged.json' # 输出文件名
- # 第一次执行 merge_json_files
- await merge_json_files(input_folder1, output_file1)
-
- if __name__ == "__main__":
- # asyncio.run(mergedFun())
- formatDataFun()
-
|