import pandas as pd import json import re import random import os def replace_numbers(text): pattern = r'(当前(?:的)?值(?:是|为)\s*)(\d+(\.\d+)?)' # 匹配"当前的值"后面的数字 def repl(match): num_str = match.group(2) num = float(num_str) if '.' in num_str: random_offset = round(random.uniform(-0.001, 0.05), 2) elif len(num_str) == 1: random_offset = round(random.uniform(-0.05, 0.05), 2) elif len(num_str) == 2: random_offset = round(random.uniform(-1.00, 1.00), 2) else: random_offset = round(random.uniform(-2, 2), 2) new_num = num + random_offset formatted_num = '{:.2f}'.format(new_num) # 将随机化后的数字应用到匹配结果中 return match.group(1) + formatted_num new_text = re.sub(pattern, repl, text) return new_text def excel_to_json(excel_file_path): excel_data = pd.ExcelFile(excel_file_path) sheet_names = excel_data.sheet_names json_data = {} all_data = [] for sheet_name in sheet_names: sheet_data = [] df_sheet = pd.read_excel(excel_data, sheet_name=sheet_name) for _ in range(12): for _, row in df_sheet.iterrows(): conversations = [] system = '' check_value = '' for col_name, cell_value in row.items(): if pd.isna(cell_value): continue if col_name == 'check': check_value = eval(cell_value) cell_value = re.sub(r'{{(?:export)}}', "", str(cell_value).strip()).strip() if col_name == 'system': if pd.notna(cell_value): system = replace_numbers(cell_value).format_map(check_value) else: system = '' elif col_name.startswith('问') and pd.notna(cell_value): conversations.append({"from": "human", "value": cell_value}) elif col_name.startswith('答') and pd.notna(cell_value): conversations.append({"from": "gpt", "value": cell_value}) if conversations: sheet_data.append({"system": system,"conversations": conversations}) all_data.append({"system": system,"conversations": conversations}) json_data[sheet_name] = sheet_data print(len(all_data)) return json_data ,all_data def save_json_per_sheet(json_data, output_folder): for sheet_name, sheet_data in json_data.items(): output_file = os.path.join(output_folder, f'{sheet_name}.json') with open(output_file, 'w', encoding='utf-8') as f_out: json.dump(sheet_data, f_out, ensure_ascii=False, indent=4) def save_json_all(all_data, output_file): with open(output_file, 'w', encoding='utf-8') as f_out: json.dump(all_data, f_out, ensure_ascii=False, indent=4) # Excel 文件路径 excel_file_path = './excel/决策标注模版4.0_demo.xlsx' # 输出 JSON 文件夹路径 output_folder = './excel/demo' # 确保输出文件夹存在,如果不存在则创建 os.makedirs(output_folder, exist_ok=True) # 将 Excel 转换为 JSON 数据 json_data,all_data = excel_to_json(excel_file_path) # 将每个 sheet 的 JSON 数据保存为单独的 JSON 文件 save_json_per_sheet(json_data, output_folder) # 输出 JSON 文件 output_file = './public/jueceDemo.json' save_json_all(all_data, output_file)