diff --git a/backend/scripts/fill_weight_from_dosage.py b/backend/scripts/fill_weight_from_dosage.py index 5f6e530..8e6f234 100644 --- a/backend/scripts/fill_weight_from_dosage.py +++ b/backend/scripts/fill_weight_from_dosage.py @@ -356,6 +356,7 @@ def main(): 'total_items': len(items), 'updated': 0, 'matched_by_name': 0, + 'matched_by_dosage_order': 0, 'matched_single': 0, 'skipped_no_parse': 0, 'skipped_livestock': 0, @@ -402,6 +403,8 @@ def main(): if wr['size']: size_to_weight[wr['size']] = (wr['min'], wr['max']) + # 먼저 제품명 라벨로 매칭 시도 + unmatched_rows = [] for row in apc_rows: size = detect_size_from_product_name(row.product_name) if size and size in size_to_weight: @@ -412,10 +415,74 @@ def main(): if args.verbose: print(f' 적용 (제품명 {size}): {row.product_name} → {wmin}~{wmax}kg') else: - stats['skipped_multi_no_label'] += 1 - if args.verbose: - print(f' SKIP (다중구간+라벨없음): {row.product_name} ' - f'(감지={size}, 가용={list(size_to_weight.keys())})') + unmatched_rows.append(row) + + # ── 제품명 매칭 실패한 것들 → dosage 순서 매칭 시도 ── + if unmatched_rows: + # dosage 값이 있는 APC만 추출 (NaN 제외) + rows_with_dosage = [r for r in unmatched_rows + if r.dosage and r.dosage != 'NaN'] + rows_no_dosage = [r for r in unmatched_rows + if not r.dosage or r.dosage == 'NaN'] + + if rows_with_dosage and len(weight_ranges) >= 2: + # dosage에서 첫 번째 숫자 추출하여 정렬 키로 사용 + def dosage_sort_key(dosage_str): + nums = re.findall(r'(\d+\.?\d+)', dosage_str) + return float(nums[0]) if nums else 0 + + # 고유 dosage 값 추출 (순서 유지) + unique_dosages = sorted( + set(r.dosage for r in rows_with_dosage), + key=dosage_sort_key + ) + # 체중 구간도 min 기준 정렬 (이미 정렬됨) + sorted_ranges = sorted(weight_ranges, key=lambda x: x['min']) + + if len(unique_dosages) == len(sorted_ranges): + # 개수 일치 → 순서 매칭 (작은 용량 = 작은 체중) + dosage_to_weight = {} + for d, wr in zip(unique_dosages, sorted_ranges): + dosage_to_weight[d] = (wr['min'], wr['max']) + + for row in rows_with_dosage: + if row.dosage in dosage_to_weight: + wmin, wmax = dosage_to_weight[row.dosage] + updates.append((row.apc, wmin, wmax, row.product_name, + f'dosage순서→{wmin}~{wmax}')) + stats['matched_by_dosage_order'] += 1 + stats['updated'] += 1 + if args.verbose: + print(f' 적용 (dosage순서): {row.product_name} ' + f'dosage={row.dosage} → {wmin}~{wmax}kg') + else: + stats['skipped_multi_no_label'] += 1 + if args.verbose: + print(f' SKIP (dosage매칭실패): {row.product_name}') + + # dosage 없는 APC (대표 품목 등) + for row in rows_no_dosage: + stats['skipped_multi_no_label'] += 1 + if args.verbose: + print(f' SKIP (다중구간+dosage없음): {row.product_name}') + + if args.verbose and dosage_to_weight: + print(f' dosage 매핑: {dict((d, f"{w[0]}~{w[1]}kg") for d, w in dosage_to_weight.items())}') + else: + # 개수 불일치 → SKIP + for row in unmatched_rows: + stats['skipped_multi_no_label'] += 1 + if args.verbose: + print(f' SKIP (dosage수≠구간수): {row.product_name} ' + f'(dosage {len(unique_dosages)}종 vs 구간 {len(sorted_ranges)}개)') + else: + # dosage 없는 APC만 남음 + for row in unmatched_rows: + stats['skipped_multi_no_label'] += 1 + if args.verbose: + print(f' SKIP (다중구간+라벨없음): {row.product_name} ' + f'(감지={detect_size_from_product_name(row.product_name)}, ' + f'가용={list(size_to_weight.keys())})') # ── 결과 출력 ── print('\n' + '=' * 60) @@ -425,6 +492,7 @@ def main(): print(f' 업데이트할 APC: {stats["updated"]}건') print(f' - 단일구간 적용: {stats["matched_single"]}건') print(f' - 제품명 라벨 매칭: {stats["matched_by_name"]}건') + print(f' - dosage 순서 매칭: {stats["matched_by_dosage_order"]}건') print(f' SKIP - 파싱 불가: {stats["skipped_no_parse"]}건') print(f' SKIP - 축산용 (>60kg): {stats["skipped_livestock"]}건') print(f' SKIP - 다중구간+라벨없음: {stats["skipped_multi_no_label"]}건')