From 0bcae4ec72fce0a174b1b13e506edb6b7da26e5b Mon Sep 17 00:00:00 2001
From: thug0bin <thug0bin@users.noreply.git.0bin.in>
Date: Sun, 8 Mar 2026 18:08:19 +0900
Subject: [PATCH] =?UTF-8?q?feat(scripts):=20dosage=20=EC=88=9C=EC=84=9C=20?=
 =?UTF-8?q?=EB=A7=A4=EC=B9=AD=EC=9C=BC=EB=A1=9C=20=EC=B2=B4=EC=A4=91?=
 =?UTF-8?q?=EA=B5=AC=EA=B0=84=20=EC=9E=90=EB=8F=99=20=EB=A7=A4=ED=95=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

제품명에 사이즈 라벨이 없지만 dosage 컬럼으로 구분 가능한 제품
(하트웜 솔루션, 지마스터, 넥스포인트 등) 처리 추가.
- 고유 dosage 수 == 체중구간 수 일 때 오름차순 매칭
- 작은 용량 = 작은 체중 원칙 적용
- 결과: 146건 → 189건으로 커버리지 증가 (+43건)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 backend/scripts/fill_weight_from_dosage.py | 76 ++++++++++++++++++++--
 1 file changed, 72 insertions(+), 4 deletions(-)

diff --git a/backend/scripts/fill_weight_from_dosage.py b/backend/scripts/fill_weight_from_dosage.py
index 5f6e530..8e6f234 100644
--- a/backend/scripts/fill_weight_from_dosage.py
+++ b/backend/scripts/fill_weight_from_dosage.py
@@ -356,6 +356,7 @@ def main():
         'total_items': len(items),
         'updated': 0,
         'matched_by_name': 0,
+        'matched_by_dosage_order': 0,
         'matched_single': 0,
         'skipped_no_parse': 0,
         'skipped_livestock': 0,
@@ -402,6 +403,8 @@ def main():
                 if wr['size']:
                     size_to_weight[wr['size']] = (wr['min'], wr['max'])
 
+            # 먼저 제품명 라벨로 매칭 시도
+            unmatched_rows = []
             for row in apc_rows:
                 size = detect_size_from_product_name(row.product_name)
                 if size and size in size_to_weight:
@@ -412,10 +415,74 @@ def main():
                     if args.verbose:
                         print(f'  적용 (제품명 {size}): {row.product_name} → {wmin}~{wmax}kg')
                 else:
-                    stats['skipped_multi_no_label'] += 1
-                    if args.verbose:
-                        print(f'  SKIP (다중구간+라벨없음): {row.product_name} '
-                              f'(감지={size}, 가용={list(size_to_weight.keys())})')
+                    unmatched_rows.append(row)
+
+            # ── 제품명 매칭 실패한 것들 → dosage 순서 매칭 시도 ──
+            if unmatched_rows:
+                # dosage 값이 있는 APC만 추출 (NaN 제외)
+                rows_with_dosage = [r for r in unmatched_rows
+                                    if r.dosage and r.dosage != 'NaN']
+                rows_no_dosage = [r for r in unmatched_rows
+                                  if not r.dosage or r.dosage == 'NaN']
+
+                if rows_with_dosage and len(weight_ranges) >= 2:
+                    # dosage에서 첫 번째 숫자 추출하여 정렬 키로 사용
+                    def dosage_sort_key(dosage_str):
+                        nums = re.findall(r'(\d+\.?\d+)', dosage_str)
+                        return float(nums[0]) if nums else 0
+
+                    # 고유 dosage 값 추출 (순서 유지)
+                    unique_dosages = sorted(
+                        set(r.dosage for r in rows_with_dosage),
+                        key=dosage_sort_key
+                    )
+                    # 체중 구간도 min 기준 정렬 (이미 정렬됨)
+                    sorted_ranges = sorted(weight_ranges, key=lambda x: x['min'])
+
+                    if len(unique_dosages) == len(sorted_ranges):
+                        # 개수 일치 → 순서 매칭 (작은 용량 = 작은 체중)
+                        dosage_to_weight = {}
+                        for d, wr in zip(unique_dosages, sorted_ranges):
+                            dosage_to_weight[d] = (wr['min'], wr['max'])
+
+                        for row in rows_with_dosage:
+                            if row.dosage in dosage_to_weight:
+                                wmin, wmax = dosage_to_weight[row.dosage]
+                                updates.append((row.apc, wmin, wmax, row.product_name,
+                                                f'dosage순서→{wmin}~{wmax}'))
+                                stats['matched_by_dosage_order'] += 1
+                                stats['updated'] += 1
+                                if args.verbose:
+                                    print(f'  적용 (dosage순서): {row.product_name} '
+                                          f'dosage={row.dosage} → {wmin}~{wmax}kg')
+                            else:
+                                stats['skipped_multi_no_label'] += 1
+                                if args.verbose:
+                                    print(f'  SKIP (dosage매칭실패): {row.product_name}')
+
+                        # dosage 없는 APC (대표 품목 등)
+                        for row in rows_no_dosage:
+                            stats['skipped_multi_no_label'] += 1
+                            if args.verbose:
+                                print(f'  SKIP (다중구간+dosage없음): {row.product_name}')
+
+                        if args.verbose and dosage_to_weight:
+                            print(f'    dosage 매핑: {dict((d, f"{w[0]}~{w[1]}kg") for d, w in dosage_to_weight.items())}')
+                    else:
+                        # 개수 불일치 → SKIP
+                        for row in unmatched_rows:
+                            stats['skipped_multi_no_label'] += 1
+                            if args.verbose:
+                                print(f'  SKIP (dosage수≠구간수): {row.product_name} '
+                                      f'(dosage {len(unique_dosages)}종 vs 구간 {len(sorted_ranges)}개)')
+                else:
+                    # dosage 없는 APC만 남음
+                    for row in unmatched_rows:
+                        stats['skipped_multi_no_label'] += 1
+                        if args.verbose:
+                            print(f'  SKIP (다중구간+라벨없음): {row.product_name} '
+                                  f'(감지={detect_size_from_product_name(row.product_name)}, '
+                                  f'가용={list(size_to_weight.keys())})')
 
     # ── 결과 출력 ──
     print('\n' + '=' * 60)
@@ -425,6 +492,7 @@ def main():
     print(f'  업데이트할 APC:           {stats["updated"]}건')
     print(f'    - 단일구간 적용:        {stats["matched_single"]}건')
     print(f'    - 제품명 라벨 매칭:     {stats["matched_by_name"]}건')
+    print(f'    - dosage 순서 매칭:     {stats["matched_by_dosage_order"]}건')
     print(f'  SKIP - 파싱 불가:         {stats["skipped_no_parse"]}건')
     print(f'  SKIP - 축산용 (>60kg):    {stats["skipped_livestock"]}건')
     print(f'  SKIP - 다중구간+라벨없음: {stats["skipped_multi_no_label"]}건')