feat(animal-chat): APC 코드 2024년 체계 지원 및 피부약 2단계 추천

## APC 코드 체계 확장 - 기존: 023%만 검색 (~2023년 제품만) - 변경: 02% OR 92% + 13자리 검증 - 02%: 2023년 이전 item_seq (9자리) 기반 APC - 92%: 2024년 이후 item_seq (10자리) 기반 APC - 999% 등 청구프로그램 임의코드는 제외 ## 동물약 챗봇 피부약 추천 개선 - 피부약 2단계 추천 구조 추가 - 1차(치료): 의약품 (개시딘겔, 테르비덤 등) - 2차(보조케어): 의약외품 (스킨카솔 - 회복기 피부보호) - 스킨카솔은 의약외품임을 명시하여 치료제로 오인 방지 ## 기타 - RAG 테스트 스크립트 추가 - 수인약품 API 문서화
2026-03-11 14:17:04 +09:00
parent e470deaefc
commit 83ecf88bd4
21 changed files with 724 additions and 76 deletions
--- a/backend/utils/animal_rag.py
+++ b/backend/utils/animal_rag.py
@@ -106,12 +106,56 @@ class AnimalDrugRAG:
        
        return embeddings
    
+    def _extract_product_info(self, content: str) -> Dict[str, str]:
+        """
+        MD 파일 상단에서 제품 정보 추출
+        - 제품명 (한글/영문)
+        - 성분
+        - 대상 동물
+        """
+        info = {"product_name": "", "ingredients": "", "target_animal": ""}
+        
+        # # 제목에서 제품명 추출 (예: "# 복합 개시딘 겔 - 표면성...")
+        title_match = re.search(r'^# (.+?)(?:\s*[-–—]|$)', content, re.MULTILINE)
+        if title_match:
+            info["product_name"] = title_match.group(1).strip()
+        
+        # > 성분: 라인에서 추출
+        ingredient_match = re.search(r'>\s*성분[:\s]+(.+?)(?:\n|$)', content)
+        if ingredient_match:
+            info["ingredients"] = ingredient_match.group(1).strip()[:100]  # 100자 제한
+        
+        # 대상 동물 추출 (테이블에서)
+        animal_match = re.search(r'\*\*대상\s*동물\*\*[^\|]*\|\s*([^\|]+)', content)
+        if animal_match:
+            info["target_animal"] = animal_match.group(1).strip()
+        
+        return info
+    
+    def _make_chunk_prefix(self, product_info: Dict[str, str]) -> str:
+        """청크 prefix 생성"""
+        parts = []
+        if product_info["product_name"]:
+            parts.append(f"제품명: {product_info['product_name']}")
+        if product_info["target_animal"]:
+            parts.append(f"대상: {product_info['target_animal']}")
+        if product_info["ingredients"]:
+            parts.append(f"성분: {product_info['ingredients']}")
+        
+        if parts:
+            return "[" + " | ".join(parts) + "]\n\n"
+        return ""
+    
    def chunk_markdown(self, content: str, source_file: str) -> List[Dict]:
        """
-        마크다운 청킹 (섹션 기반)
+        마크다운 청킹 (섹션 기반 + 제품명 prefix)
        """
        chunks = []
        
+        # 제품 정보 추출 & prefix 생성
+        product_info = self._extract_product_info(content)
+        prefix = self._make_chunk_prefix(product_info)
+        
        # ## 헤더 기준 분리
        sections = re.split(r'\n(?=## )', content)
        
@@ -123,26 +167,34 @@ class AnimalDrugRAG:
            title_match = re.match(r'^## (.+?)(?:\n|$)', section)
            section_title = title_match.group(1).strip() if title_match else f"섹션{i+1}"
            
+            # prefix + section 결합
+            prefixed_section = prefix + section
+            
            # 큰 섹션은 추가 분할
-            if len(section) > CHUNK_SIZE:
-                sub_chunks = self._split_by_size(section, CHUNK_SIZE, CHUNK_OVERLAP)
+            if len(prefixed_section) > CHUNK_SIZE:
+                sub_chunks = self._split_by_size(prefixed_section, CHUNK_SIZE, CHUNK_OVERLAP)
                for j, sub_chunk in enumerate(sub_chunks):
+                    # 분할된 청크에도 prefix 보장 (overlap으로 잘렸을 경우)
+                    if j > 0 and not sub_chunk.startswith("["):
+                        sub_chunk = prefix + sub_chunk
                    chunk_id = f"{source_file}#{section_title}#{j}"
                    chunks.append({
                        "id": chunk_id,
                        "text": sub_chunk,
                        "source": source_file,
                        "section": section_title,
-                        "chunk_index": j
+                        "chunk_index": j,
+                        "product_name": product_info["product_name"]
                    })
            else:
                chunk_id = f"{source_file}#{section_title}"
                chunks.append({
                    "id": chunk_id,
-                    "text": section,
+                    "text": prefixed_section,
                    "source": source_file,
                    "section": section_title,
-                    "chunk_index": 0
+                    "chunk_index": 0,
+                    "product_name": product_info["product_name"]
                })
        
        return chunks
@@ -215,6 +267,7 @@ class AnimalDrugRAG:
                "source": chunk["source"],
                "section": chunk["section"],
                "chunk_index": chunk["chunk_index"],
+                "product_name": chunk.get("product_name", ""),
                "vector": emb
            })
        
@@ -224,7 +277,7 @@ class AnimalDrugRAG:
        
        return len(data)
    
-    def search(self, query: str, n_results: int = 3) -> List[Dict]:
+    def search(self, query: str, n_results: int = 5) -> List[Dict]:
        """
        유사도 검색
        """
@@ -248,9 +301,9 @@ class AnimalDrugRAG:
                distance = r.get("_distance", 10)
                score = 1 / (1 + distance)  # 0~1 범위로 변환
                
-                # 임계값: 유사도 0.3 미만은 제외 (관련 없는 문서)
-                # L2 거리 2.33 이상이면 제외
-                if score < 0.3:
+                # 임계값: 유사도 0.2 미만은 제외 (관련 없는 문서)
+                # L2 거리 4.0 이상이면 제외
+                if score < 0.2:
                    continue
                    
                output.append({