feat: yakkok.com 제품 이미지 크롤러 + 어드민 페이지

크롤러 (utils/yakkok_crawler.py):
- yakkok.com에서 제품 검색 및 이미지 추출
- MSSQL 오늘 판매 품목 자동 조회
- base64 변환 후 SQLite 저장
- CLI 지원 (--today, --product)

DB (product_images.db):
- 바코드, 제품명, 이미지(base64), 상태 저장
- 크롤링 로그 테이블

어드민 페이지 (/admin/product-images):
- 이미지 목록/검색/필터
- 통계 (성공/실패/대기)
- 상세 보기/삭제
- 오늘 판매 제품 일괄 크롤링

API:
- GET /api/admin/product-images
- GET /api/admin/product-images/<barcode>
- POST /api/admin/product-images/crawl-today
- DELETE /api/admin/product-images/<barcode>
This commit is contained in:
thug0bin 2026-03-02 23:19:52 +09:00
parent 4713395557
commit 29648e3a7d
6 changed files with 1648 additions and 0 deletions

View File

@ -5649,6 +5649,169 @@ def api_search_mssql_drug():
return jsonify({'success': False, 'error': str(e)}), 500
# ============================================================
# 제품 이미지 관리 (yakkok 크롤러)
# ============================================================
@app.route('/admin/product-images')
def admin_product_images():
"""제품 이미지 관리 어드민 페이지"""
return render_template('admin_product_images.html')
@app.route('/api/admin/product-images')
def api_product_images_list():
"""제품 이미지 목록 조회"""
import sqlite3
try:
db_path = os.path.join(os.path.dirname(__file__), 'db', 'product_images.db')
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
cursor = conn.cursor()
status_filter = request.args.get('status', '')
search = request.args.get('search', '')
limit = int(request.args.get('limit', 50))
offset = int(request.args.get('offset', 0))
where_clauses = []
params = []
if status_filter:
where_clauses.append("status = ?")
params.append(status_filter)
if search:
where_clauses.append("(product_name LIKE ? OR barcode LIKE ?)")
params.extend([f'%{search}%', f'%{search}%'])
where_sql = " WHERE " + " AND ".join(where_clauses) if where_clauses else ""
# 총 개수
cursor.execute(f"SELECT COUNT(*) FROM product_images {where_sql}", params)
total = cursor.fetchone()[0]
# 목록 조회
cursor.execute(f"""
SELECT id, barcode, drug_code, product_name, thumbnail_base64,
image_url, status, created_at, error_message
FROM product_images
{where_sql}
ORDER BY created_at DESC
LIMIT ? OFFSET ?
""", params + [limit, offset])
items = [dict(row) for row in cursor.fetchall()]
conn.close()
return jsonify({
'success': True,
'total': total,
'items': items
})
except Exception as e:
logging.error(f"제품 이미지 목록 조회 오류: {e}")
return jsonify({'success': False, 'error': str(e)}), 500
@app.route('/api/admin/product-images/<barcode>')
def api_product_image_detail(barcode):
"""제품 이미지 상세 조회 (원본 base64 포함)"""
import sqlite3
try:
db_path = os.path.join(os.path.dirname(__file__), 'db', 'product_images.db')
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
cursor = conn.cursor()
cursor.execute("SELECT * FROM product_images WHERE barcode = ?", (barcode,))
row = cursor.fetchone()
conn.close()
if row:
return jsonify({'success': True, 'image': dict(row)})
else:
return jsonify({'success': False, 'error': '이미지 없음'}), 404
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
@app.route('/api/admin/product-images/crawl-today', methods=['POST'])
def api_crawl_today():
"""오늘 판매 제품 크롤링"""
try:
from utils.yakkok_crawler import crawl_today_sales
result = crawl_today_sales(headless=True)
return jsonify({'success': True, 'result': result})
except Exception as e:
logging.error(f"크롤링 오류: {e}")
return jsonify({'success': False, 'error': str(e)}), 500
@app.route('/api/admin/product-images/crawl', methods=['POST'])
def api_crawl_products():
"""특정 제품 크롤링"""
try:
from utils.yakkok_crawler import crawl_products
data = request.get_json()
products = data.get('products', []) # [(barcode, drug_code, product_name), ...]
if not products:
return jsonify({'success': False, 'error': '제품 목록 필요'}), 400
result = crawl_products(products, headless=True)
return jsonify({'success': True, 'result': result})
except Exception as e:
logging.error(f"크롤링 오류: {e}")
return jsonify({'success': False, 'error': str(e)}), 500
@app.route('/api/admin/product-images/<barcode>', methods=['DELETE'])
def api_delete_product_image(barcode):
"""제품 이미지 삭제"""
import sqlite3
try:
db_path = os.path.join(os.path.dirname(__file__), 'db', 'product_images.db')
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
cursor.execute("DELETE FROM product_images WHERE barcode = ?", (barcode,))
conn.commit()
conn.close()
return jsonify({'success': True})
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
@app.route('/api/admin/product-images/stats')
def api_product_images_stats():
"""이미지 통계"""
import sqlite3
try:
db_path = os.path.join(os.path.dirname(__file__), 'db', 'product_images.db')
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
cursor.execute("""
SELECT status, COUNT(*) as count
FROM product_images
GROUP BY status
""")
stats = {row[0]: row[1] for row in cursor.fetchall()}
cursor.execute("SELECT COUNT(*) FROM product_images")
total = cursor.fetchone()[0]
conn.close()
return jsonify({
'success': True,
'total': total,
'stats': stats
})
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
if __name__ == '__main__':
import os

View File

@ -0,0 +1,38 @@
-- product_images.db 스키마
-- yakkok.com에서 크롤링한 제품 이미지 저장
CREATE TABLE IF NOT EXISTS product_images (
id INTEGER PRIMARY KEY AUTOINCREMENT,
barcode TEXT UNIQUE NOT NULL, -- 바코드 (고유키)
drug_code TEXT, -- PIT3000 DrugCode
product_name TEXT NOT NULL, -- 제품명
search_name TEXT, -- 검색에 사용한 이름
image_base64 TEXT, -- 이미지 (base64)
image_url TEXT, -- 원본 URL
thumbnail_base64 TEXT, -- 썸네일 (base64, 작은 사이즈)
source TEXT DEFAULT 'yakkok', -- 출처
status TEXT DEFAULT 'pending', -- pending/success/failed/manual/no_result
error_message TEXT, -- 실패 시 에러 메시지
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
-- 인덱스
CREATE INDEX IF NOT EXISTS idx_barcode ON product_images(barcode);
CREATE INDEX IF NOT EXISTS idx_status ON product_images(status);
CREATE INDEX IF NOT EXISTS idx_drug_code ON product_images(drug_code);
CREATE INDEX IF NOT EXISTS idx_created_at ON product_images(created_at);
-- 크롤링 로그 테이블
CREATE TABLE IF NOT EXISTS crawl_logs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
batch_id TEXT, -- 배치 ID
total_count INTEGER DEFAULT 0, -- 전체 개수
success_count INTEGER DEFAULT 0, -- 성공 개수
failed_count INTEGER DEFAULT 0, -- 실패 개수
skipped_count INTEGER DEFAULT 0, -- 스킵 개수 (이미 있음)
started_at DATETIME,
finished_at DATETIME,
status TEXT DEFAULT 'running', -- running/completed/failed
error_message TEXT
);

View File

@ -0,0 +1,575 @@
<!DOCTYPE html>
<html lang="ko">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>제품 이미지 관리 - yakkok 크롤러</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link href="https://fonts.googleapis.com/css2?family=Noto+Sans+KR:wght@400;500;600;700&display=swap" rel="stylesheet">
<style>
* { margin: 0; padding: 0; box-sizing: border-box; }
body {
font-family: 'Noto Sans KR', sans-serif;
background: linear-gradient(135deg, #0f0f23 0%, #1a1a3e 50%, #2d1b4e 100%);
min-height: 100vh;
color: #e0e0e0;
}
.container {
max-width: 1400px;
margin: 0 auto;
padding: 20px;
}
header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 24px;
flex-wrap: wrap;
gap: 16px;
}
h1 {
font-size: 24px;
background: linear-gradient(135deg, #a855f7, #6366f1);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
}
.actions {
display: flex;
gap: 12px;
flex-wrap: wrap;
}
.btn {
padding: 10px 20px;
border: none;
border-radius: 8px;
font-size: 14px;
font-weight: 600;
cursor: pointer;
transition: all 0.3s;
}
.btn-primary {
background: linear-gradient(135deg, #8b5cf6, #6366f1);
color: white;
}
.btn-primary:hover {
transform: translateY(-2px);
box-shadow: 0 4px 15px rgba(139, 92, 246, 0.4);
}
.btn-secondary {
background: rgba(255,255,255,0.1);
color: #e0e0e0;
border: 1px solid rgba(255,255,255,0.2);
}
.btn-secondary:hover {
background: rgba(255,255,255,0.2);
}
.btn-danger {
background: linear-gradient(135deg, #ef4444, #dc2626);
color: white;
}
.stats-bar {
display: flex;
gap: 16px;
margin-bottom: 24px;
flex-wrap: wrap;
}
.stat-card {
background: rgba(255,255,255,0.05);
backdrop-filter: blur(10px);
border: 1px solid rgba(255,255,255,0.1);
border-radius: 12px;
padding: 16px 24px;
min-width: 120px;
}
.stat-card .value {
font-size: 28px;
font-weight: 700;
color: #a855f7;
}
.stat-card .label {
font-size: 12px;
color: #9ca3af;
margin-top: 4px;
}
.stat-card.success .value { color: #10b981; }
.stat-card.failed .value { color: #ef4444; }
.stat-card.pending .value { color: #f59e0b; }
.filters {
display: flex;
gap: 12px;
margin-bottom: 20px;
flex-wrap: wrap;
}
.search-box {
flex: 1;
min-width: 200px;
padding: 10px 16px;
background: rgba(255,255,255,0.05);
border: 1px solid rgba(255,255,255,0.1);
border-radius: 8px;
color: white;
font-size: 14px;
}
.search-box::placeholder { color: #6b7280; }
.filter-select {
padding: 10px 16px;
background: rgba(255,255,255,0.05);
border: 1px solid rgba(255,255,255,0.1);
border-radius: 8px;
color: white;
font-size: 14px;
}
.filter-select option { background: #1a1a3e; }
.image-grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
gap: 16px;
}
.image-card {
background: rgba(255,255,255,0.05);
backdrop-filter: blur(10px);
border: 1px solid rgba(255,255,255,0.1);
border-radius: 12px;
overflow: hidden;
transition: all 0.3s;
}
.image-card:hover {
transform: translateY(-4px);
box-shadow: 0 8px 25px rgba(139, 92, 246, 0.2);
}
.image-card .thumb {
width: 100%;
height: 150px;
background: rgba(0,0,0,0.3);
display: flex;
align-items: center;
justify-content: center;
overflow: hidden;
}
.image-card .thumb img {
max-width: 100%;
max-height: 100%;
object-fit: contain;
}
.image-card .thumb.no-image {
color: #6b7280;
font-size: 12px;
}
.image-card .info {
padding: 12px;
}
.image-card .name {
font-size: 13px;
font-weight: 600;
color: #e0e0e0;
margin-bottom: 4px;
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
}
.image-card .barcode {
font-size: 11px;
color: #9ca3af;
font-family: monospace;
}
.image-card .status {
display: inline-block;
padding: 2px 8px;
border-radius: 4px;
font-size: 10px;
font-weight: 600;
margin-top: 8px;
}
.status.success { background: rgba(16,185,129,0.2); color: #10b981; }
.status.failed { background: rgba(239,68,68,0.2); color: #ef4444; }
.status.pending { background: rgba(245,158,11,0.2); color: #f59e0b; }
.status.no_result { background: rgba(107,114,128,0.2); color: #9ca3af; }
.status.manual { background: rgba(59,130,246,0.2); color: #3b82f6; }
.image-card .actions {
display: flex;
gap: 8px;
margin-top: 8px;
}
.image-card .btn-sm {
padding: 4px 8px;
font-size: 11px;
border-radius: 4px;
}
.modal {
display: none;
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 100%;
background: rgba(0,0,0,0.8);
z-index: 1000;
align-items: center;
justify-content: center;
}
.modal.show { display: flex; }
.modal-content {
background: #1a1a3e;
border-radius: 16px;
padding: 24px;
max-width: 500px;
width: 90%;
max-height: 80vh;
overflow: auto;
}
.modal-content h3 {
margin-bottom: 16px;
color: #a855f7;
}
.modal-content img {
max-width: 100%;
border-radius: 8px;
margin-bottom: 16px;
}
.loading {
text-align: center;
padding: 40px;
color: #9ca3af;
}
.loading::after {
content: '';
display: inline-block;
width: 20px;
height: 20px;
border: 2px solid #a855f7;
border-top-color: transparent;
border-radius: 50%;
animation: spin 1s linear infinite;
margin-left: 10px;
}
@keyframes spin {
to { transform: rotate(360deg); }
}
.toast {
position: fixed;
bottom: 20px;
right: 20px;
padding: 12px 24px;
border-radius: 8px;
color: white;
font-weight: 500;
z-index: 2000;
animation: slideIn 0.3s ease;
}
.toast.success { background: linear-gradient(135deg, #10b981, #059669); }
.toast.error { background: linear-gradient(135deg, #ef4444, #dc2626); }
.toast.info { background: linear-gradient(135deg, #3b82f6, #2563eb); }
@keyframes slideIn {
from { transform: translateX(100%); opacity: 0; }
to { transform: translateX(0); opacity: 1; }
}
.empty-state {
text-align: center;
padding: 60px 20px;
color: #6b7280;
}
.empty-state .icon {
font-size: 48px;
margin-bottom: 16px;
}
</style>
</head>
<body>
<div class="container">
<header>
<h1>🖼️ 제품 이미지 관리</h1>
<div class="actions">
<button class="btn btn-primary" onclick="crawlToday()">
🔄 오늘 판매 제품 크롤링
</button>
<a href="/admin" class="btn btn-secondary">← 어드민</a>
</div>
</header>
<div class="stats-bar" id="statsBar">
<div class="stat-card">
<div class="value" id="statTotal">-</div>
<div class="label">전체</div>
</div>
<div class="stat-card success">
<div class="value" id="statSuccess">-</div>
<div class="label">성공</div>
</div>
<div class="stat-card failed">
<div class="value" id="statFailed">-</div>
<div class="label">실패</div>
</div>
<div class="stat-card pending">
<div class="value" id="statPending">-</div>
<div class="label">대기</div>
</div>
</div>
<div class="filters">
<input type="text" class="search-box" id="searchBox"
placeholder="제품명 또는 바코드 검색..."
onkeyup="debounceSearch()">
<select class="filter-select" id="statusFilter" onchange="loadImages()">
<option value="">전체 상태</option>
<option value="success">성공</option>
<option value="failed">실패</option>
<option value="no_result">검색결과없음</option>
<option value="pending">대기</option>
<option value="manual">수동등록</option>
</select>
</div>
<div class="image-grid" id="imageGrid">
<div class="loading">이미지 로딩 중...</div>
</div>
</div>
<!-- 상세 모달 -->
<div class="modal" id="detailModal">
<div class="modal-content">
<h3 id="modalTitle">제품 상세</h3>
<img id="modalImage" src="" alt="">
<div id="modalInfo"></div>
<div style="margin-top: 16px; text-align: right;">
<button class="btn btn-secondary" onclick="closeModal()">닫기</button>
</div>
</div>
</div>
<script>
let debounceTimer;
// 초기 로드
document.addEventListener('DOMContentLoaded', () => {
loadStats();
loadImages();
});
async function loadStats() {
try {
const res = await fetch('/api/admin/product-images/stats');
const data = await res.json();
if (data.success) {
document.getElementById('statTotal').textContent = data.total || 0;
document.getElementById('statSuccess').textContent = data.stats.success || 0;
document.getElementById('statFailed').textContent =
(data.stats.failed || 0) + (data.stats.no_result || 0);
document.getElementById('statPending').textContent = data.stats.pending || 0;
}
} catch (err) {
console.error(err);
}
}
async function loadImages() {
const grid = document.getElementById('imageGrid');
const search = document.getElementById('searchBox').value;
const status = document.getElementById('statusFilter').value;
grid.innerHTML = '<div class="loading">이미지 로딩 중...</div>';
try {
const params = new URLSearchParams();
if (search) params.append('search', search);
if (status) params.append('status', status);
params.append('limit', 100);
const res = await fetch(`/api/admin/product-images?${params}`);
const data = await res.json();
if (data.success && data.items.length > 0) {
grid.innerHTML = data.items.map(item => `
<div class="image-card" data-barcode="${item.barcode}">
<div class="thumb ${item.thumbnail_base64 ? '' : 'no-image'}">
${item.thumbnail_base64
? `<img src="data:image/jpeg;base64,${item.thumbnail_base64}" alt="${item.product_name}">`
: '이미지 없음'}
</div>
<div class="info">
<div class="name" title="${item.product_name}">${item.product_name}</div>
<div class="barcode">${item.barcode}</div>
<span class="status ${item.status}">${getStatusText(item.status)}</span>
<div class="actions">
<button class="btn btn-secondary btn-sm" onclick="viewDetail('${item.barcode}')">상세</button>
<button class="btn btn-danger btn-sm" onclick="deleteImage('${item.barcode}')">삭제</button>
</div>
</div>
</div>
`).join('');
} else {
grid.innerHTML = `
<div class="empty-state" style="grid-column: 1/-1;">
<div class="icon">📷</div>
<div>등록된 이미지가 없습니다</div>
<div style="margin-top: 8px; font-size: 13px;">
"오늘 판매 제품 크롤링" 버튼을 눌러 시작하세요
</div>
</div>
`;
}
} catch (err) {
grid.innerHTML = `<div class="empty-state" style="grid-column: 1/-1;">오류: ${err.message}</div>`;
}
}
function getStatusText(status) {
const map = {
'success': '성공',
'failed': '실패',
'pending': '대기',
'no_result': '검색없음',
'manual': '수동등록'
};
return map[status] || status;
}
function debounceSearch() {
clearTimeout(debounceTimer);
debounceTimer = setTimeout(loadImages, 300);
}
async function crawlToday() {
if (!confirm('오늘 판매된 제품 이미지를 크롤링합니다. 진행할까요?')) return;
showToast('크롤링 시작... 잠시 기다려주세요', 'info');
try {
const res = await fetch('/api/admin/product-images/crawl-today', {
method: 'POST'
});
const data = await res.json();
if (data.success) {
const r = data.result;
showToast(`완료! 성공: ${r.success}, 실패: ${r.failed}, 스킵: ${r.skipped}`, 'success');
loadStats();
loadImages();
} else {
showToast(data.error || '크롤링 실패', 'error');
}
} catch (err) {
showToast('오류: ' + err.message, 'error');
}
}
async function viewDetail(barcode) {
try {
const res = await fetch(`/api/admin/product-images/${barcode}`);
const data = await res.json();
if (data.success) {
const img = data.image;
document.getElementById('modalTitle').textContent = img.product_name;
document.getElementById('modalImage').src = img.image_base64
? `data:image/jpeg;base64,${img.image_base64}`
: '';
document.getElementById('modalImage').style.display = img.image_base64 ? 'block' : 'none';
document.getElementById('modalInfo').innerHTML = `
<p><strong>바코드:</strong> ${img.barcode}</p>
<p><strong>DrugCode:</strong> ${img.drug_code || '-'}</p>
<p><strong>검색어:</strong> ${img.search_name || '-'}</p>
<p><strong>상태:</strong> ${getStatusText(img.status)}</p>
<p><strong>원본 URL:</strong> <a href="${img.image_url}" target="_blank" style="color:#a855f7;">${img.image_url || '-'}</a></p>
<p><strong>등록일:</strong> ${img.created_at}</p>
${img.error_message ? `<p><strong>에러:</strong> ${img.error_message}</p>` : ''}
`;
document.getElementById('detailModal').classList.add('show');
}
} catch (err) {
showToast('상세 조회 실패', 'error');
}
}
function closeModal() {
document.getElementById('detailModal').classList.remove('show');
}
async function deleteImage(barcode) {
if (!confirm('이 이미지를 삭제할까요?')) return;
try {
const res = await fetch(`/api/admin/product-images/${barcode}`, {
method: 'DELETE'
});
const data = await res.json();
if (data.success) {
showToast('삭제 완료', 'success');
loadStats();
loadImages();
} else {
showToast(data.error || '삭제 실패', 'error');
}
} catch (err) {
showToast('오류: ' + err.message, 'error');
}
}
function showToast(message, type = 'info') {
const toast = document.createElement('div');
toast.className = `toast ${type}`;
toast.textContent = message;
document.body.appendChild(toast);
setTimeout(() => toast.remove(), 4000);
}
// ESC로 모달 닫기
document.addEventListener('keydown', (e) => {
if (e.key === 'Escape') closeModal();
});
// 모달 외부 클릭으로 닫기
document.getElementById('detailModal').addEventListener('click', (e) => {
if (e.target.id === 'detailModal') closeModal();
});
</script>
</body>
</html>

8
backend/test_pg.py Normal file
View File

@ -0,0 +1,8 @@
from sqlalchemy import create_engine, text
pg_engine = create_engine('postgresql://admin:trajet6640@192.168.0.87:5432/apdb_master')
with pg_engine.connect() as conn:
result = conn.execute(text("SELECT apc, product_name, company_name, main_ingredient FROM apc WHERE product_name LIKE '%아시엔로%' LIMIT 20"))
print('아시엔로 검색 결과:')
for row in result:
print(f' APC: {row[0]} | {row[1]} | {row[2]} | {row[3]}')

View File

@ -0,0 +1,349 @@
# -*- coding: utf-8 -*-
"""
yakkok.com 제품 이미지 크롤러
- 제품명으로 검색하여 이미지 URL 추출
- base64로 변환하여 SQLite에 저장
"""
import os
import sys
import sqlite3
import base64
import logging
import hashlib
import re
from datetime import datetime
from urllib.parse import quote
import requests
from PIL import Image
from io import BytesIO
# Playwright 동기 모드
from playwright.sync_api import sync_playwright
# 로깅 설정
logging.basicConfig(level=logging.INFO, format='[%(levelname)s] %(message)s')
logger = logging.getLogger(__name__)
# DB 경로
DB_PATH = os.path.join(os.path.dirname(__file__), '..', 'db', 'product_images.db')
# yakkok.com 설정
YAKKOK_BASE_URL = "https://yakkok.com"
YAKKOK_SEARCH_URL = "https://yakkok.com/search?q={query}"
def init_db():
"""DB 초기화"""
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
# 스키마 파일 실행
schema_path = os.path.join(os.path.dirname(__file__), '..', 'db', 'product_images_schema.sql')
if os.path.exists(schema_path):
with open(schema_path, 'r', encoding='utf-8') as f:
cursor.executescript(f.read())
conn.commit()
conn.close()
logger.info(f"[DB] 초기화 완료: {DB_PATH}")
def get_existing_barcodes():
"""이미 저장된 바코드 목록 조회"""
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
cursor.execute("SELECT barcode FROM product_images WHERE status IN ('success', 'manual')")
barcodes = set(row[0] for row in cursor.fetchall())
conn.close()
return barcodes
def save_product_image(barcode, drug_code, product_name, search_name,
image_base64, image_url, thumbnail_base64=None,
status='success', error_message=None):
"""제품 이미지 저장"""
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
cursor.execute("""
INSERT OR REPLACE INTO product_images
(barcode, drug_code, product_name, search_name, image_base64, image_url,
thumbnail_base64, status, error_message, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (barcode, drug_code, product_name, search_name, image_base64, image_url,
thumbnail_base64, status, error_message, datetime.now().isoformat()))
conn.commit()
conn.close()
logger.info(f"[DB] 저장 완료: {product_name} ({barcode}) - {status}")
def download_image_as_base64(url, max_size=500):
"""이미지 다운로드 후 base64 변환 (리사이즈 포함)"""
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
# PIL로 이미지 열기
img = Image.open(BytesIO(response.content))
# RGBA -> RGB 변환 (JPEG 저장용)
if img.mode == 'RGBA':
bg = Image.new('RGB', img.size, (255, 255, 255))
bg.paste(img, mask=img.split()[3])
img = bg
elif img.mode != 'RGB':
img = img.convert('RGB')
# 리사이즈 (비율 유지)
if max(img.size) > max_size:
ratio = max_size / max(img.size)
new_size = tuple(int(dim * ratio) for dim in img.size)
img = img.resize(new_size, Image.LANCZOS)
# base64 변환
buffer = BytesIO()
img.save(buffer, format='JPEG', quality=85)
base64_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
return base64_str
except Exception as e:
logger.error(f"[ERROR] 이미지 다운로드 실패: {url} - {e}")
return None
def clean_product_name(name):
"""검색용 제품명 정리"""
# 괄호 안 내용 제거 (용량 등)
name = re.sub(r'\([^)]*\)', '', name)
# 숫자+단위 제거 (100ml, 500mg 등)
name = re.sub(r'\d+\s*(ml|mg|g|kg|정|캡슐|T|t|개|EA|ea)', '', name, flags=re.IGNORECASE)
# 특수문자 제거
name = re.sub(r'[_\-/\\]', ' ', name)
# 연속 공백 정리
name = re.sub(r'\s+', ' ', name).strip()
return name
def search_yakkok(page, product_name):
"""yakkok.com에서 제품 검색하여 이미지 URL 반환"""
try:
# 검색어 정리
search_name = clean_product_name(product_name)
if not search_name:
search_name = product_name
# 검색 페이지 접속
search_url = YAKKOK_SEARCH_URL.format(query=quote(search_name))
page.goto(search_url, wait_until='networkidle', timeout=15000)
# 잠시 대기
page.wait_for_timeout(1000)
# 첫 번째 검색 결과의 이미지 찾기
img_selector = 'img[alt]'
images = page.query_selector_all(img_selector)
for img in images:
src = img.get_attribute('src')
alt = img.get_attribute('alt') or ''
# 로고, 아이콘 등 제외
if not src or 'logo' in src.lower() or 'icon' in src.lower():
continue
# 검색 아이콘 등 제외
if alt in ['검색', '', '마이', '재고콕', '약콕인증', '뒤로가기']:
continue
# 제품 이미지로 보이는 것 반환
if src.startswith('http') or src.startswith('//'):
if src.startswith('//'):
src = 'https:' + src
return src, search_name
return None, search_name
except Exception as e:
logger.error(f"[ERROR] 검색 실패: {product_name} - {e}")
return None, search_name
def crawl_products(products, headless=True):
"""
제품 목록 크롤링
products: [(barcode, drug_code, product_name), ...]
"""
init_db()
existing = get_existing_barcodes()
# 새로 크롤링할 제품만 필터
to_crawl = [(b, d, n) for b, d, n in products if b not in existing]
if not to_crawl:
logger.info("[INFO] 크롤링할 새 제품이 없습니다.")
return {'total': 0, 'success': 0, 'failed': 0, 'skipped': len(products)}
logger.info(f"[INFO] 크롤링 시작: {len(to_crawl)}개 (스킵: {len(products) - len(to_crawl)}개)")
results = {'total': len(to_crawl), 'success': 0, 'failed': 0, 'skipped': len(products) - len(to_crawl)}
with sync_playwright() as p:
browser = p.chromium.launch(headless=headless)
context = browser.new_context(
viewport={'width': 390, 'height': 844}, # 모바일 뷰포트
user_agent='Mozilla/5.0 (iPhone; CPU iPhone OS 16_0 like Mac OS X) AppleWebKit/605.1.15'
)
page = context.new_page()
for barcode, drug_code, product_name in to_crawl:
try:
logger.info(f"[CRAWL] {product_name} ({barcode})")
# yakkok 검색
image_url, search_name = search_yakkok(page, product_name)
if image_url:
# 이미지 다운로드 & base64 변환
image_base64 = download_image_as_base64(image_url)
thumbnail_base64 = download_image_as_base64(image_url, max_size=100)
if image_base64:
save_product_image(
barcode=barcode,
drug_code=drug_code,
product_name=product_name,
search_name=search_name,
image_base64=image_base64,
image_url=image_url,
thumbnail_base64=thumbnail_base64,
status='success'
)
results['success'] += 1
else:
save_product_image(
barcode=barcode,
drug_code=drug_code,
product_name=product_name,
search_name=search_name,
image_base64=None,
image_url=image_url,
status='failed',
error_message='이미지 다운로드 실패'
)
results['failed'] += 1
else:
save_product_image(
barcode=barcode,
drug_code=drug_code,
product_name=product_name,
search_name=search_name,
image_base64=None,
image_url=None,
status='no_result',
error_message='검색 결과 없음'
)
results['failed'] += 1
# 요청 간 딜레이
page.wait_for_timeout(500)
except Exception as e:
logger.error(f"[ERROR] {product_name}: {e}")
save_product_image(
barcode=barcode,
drug_code=drug_code,
product_name=product_name,
search_name=product_name,
image_base64=None,
image_url=None,
status='failed',
error_message=str(e)
)
results['failed'] += 1
browser.close()
logger.info(f"[DONE] 완료 - 성공: {results['success']}, 실패: {results['failed']}, 스킵: {results['skipped']}")
return results
def get_today_sales_products():
"""오늘 판매된 제품 목록 조회 (MSSQL)"""
try:
# 상위 폴더의 db 모듈 import
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from db.dbsetup import db_manager
from sqlalchemy import text
session = db_manager.get_session('PM_PRES')
today = datetime.now().strftime('%Y%m%d')
# 오늘 판매된 품목 조회 (중복 제거)
query = text("""
SELECT DISTINCT
COALESCE(NULLIF(G.Barcode, ''),
(SELECT TOP 1 CD_CD_BARCODE FROM PM_DRUG.dbo.CD_ITEM_UNIT_MEMBER WHERE DrugCode = S.DrugCode)
) AS barcode,
S.DrugCode AS drug_code,
ISNULL(G.GoodsName, '알수없음') AS product_name
FROM SALE_SUB S
LEFT JOIN PM_DRUG.dbo.CD_GOODS G ON S.DrugCode = G.DrugCode
WHERE S.SL_NO_order LIKE :today_pattern
AND S.DrugCode IS NOT NULL
""")
result = session.execute(query, {'today_pattern': f'{today}%'}).fetchall()
products = []
for row in result:
barcode = row[0]
if barcode: # 바코드 있는 것만
products.append((barcode, row[1], row[2]))
logger.info(f"[MSSQL] 오늘 판매 품목: {len(products)}")
return products
except Exception as e:
logger.error(f"[ERROR] MSSQL 조회 실패: {e}")
return []
def crawl_today_sales(headless=True):
"""오늘 판매된 제품 이미지 크롤링"""
products = get_today_sales_products()
if not products:
return {'total': 0, 'success': 0, 'failed': 0, 'skipped': 0, 'message': '오늘 판매 내역 없음'}
return crawl_products(products, headless=headless)
# CLI 실행
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='yakkok.com 제품 이미지 크롤러')
parser.add_argument('--today', action='store_true', help='오늘 판매 제품 크롤링')
parser.add_argument('--product', type=str, help='특정 제품명으로 테스트')
parser.add_argument('--visible', action='store_true', help='브라우저 표시')
args = parser.parse_args()
if args.today:
result = crawl_today_sales(headless=not args.visible)
print(f"\n결과: {result}")
elif args.product:
# 테스트용 단일 제품 크롤링
test_products = [('TEST001', 'TEST', args.product)]
result = crawl_products(test_products, headless=not args.visible)
print(f"\n결과: {result}")
else:
print("사용법:")
print(" python yakkok_crawler.py --today # 오늘 판매 제품 크롤링")
print(" python yakkok_crawler.py --product 타이레놀 # 특정 제품 테스트")
print(" python yakkok_crawler.py --visible # 브라우저 표시")

View File

@ -0,0 +1,515 @@
<!DOCTYPE html>
<html lang="ko">
<head>
<meta charset="UTF-8">
<title>스마트헬스케어 사업제안서</title>
<style>
@import url('https://fonts.googleapis.com/css2?family=Noto+Sans+KR:wght@400;500;600;700&display=swap');
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: 'Noto Sans KR', -apple-system, BlinkMacSystemFont, sans-serif;
line-height: 1.8;
color: #1e293b;
max-width: 210mm;
margin: 0 auto;
padding: 20mm;
background: #fff;
}
h1 {
font-size: 28px;
font-weight: 700;
color: #6366f1;
margin: 40px 0 20px;
padding-bottom: 10px;
border-bottom: 3px solid #6366f1;
}
h2 {
font-size: 22px;
font-weight: 700;
color: #334155;
margin: 35px 0 15px;
padding-bottom: 8px;
border-bottom: 2px solid #e2e8f0;
}
h3 {
font-size: 18px;
font-weight: 600;
color: #475569;
margin: 25px 0 12px;
}
h4 {
font-size: 16px;
font-weight: 600;
color: #64748b;
margin: 20px 0 10px;
}
p {
margin: 12px 0;
text-align: justify;
}
blockquote {
background: linear-gradient(135deg, #f8fafc 0%, #f1f5f9 100%);
border-left: 4px solid #6366f1;
padding: 16px 20px;
margin: 20px 0;
border-radius: 0 8px 8px 0;
font-style: italic;
color: #475569;
}
code {
background: #f1f5f9;
padding: 2px 6px;
border-radius: 4px;
font-family: 'Consolas', 'Monaco', monospace;
font-size: 13px;
color: #dc2626;
}
pre {
background: #1e293b;
color: #e2e8f0;
padding: 20px;
border-radius: 12px;
overflow-x: auto;
margin: 20px 0;
font-size: 12px;
line-height: 1.6;
}
pre code {
background: none;
color: inherit;
padding: 0;
}
table {
width: 100%;
border-collapse: collapse;
margin: 20px 0;
font-size: 14px;
}
th {
background: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%);
color: #fff;
padding: 12px 16px;
text-align: left;
font-weight: 600;
}
td {
padding: 12px 16px;
border-bottom: 1px solid #e2e8f0;
}
tr:nth-child(even) {
background: #f8fafc;
}
ul, ol {
margin: 15px 0;
padding-left: 25px;
}
li {
margin: 8px 0;
}
hr {
border: none;
height: 2px;
background: linear-gradient(90deg, #6366f1, #8b5cf6, #ec4899);
margin: 40px 0;
border-radius: 2px;
}
strong {
color: #334155;
font-weight: 600;
}
em {
color: #64748b;
}
/* 첫 페이지 타이틀 */
h1:first-of-type {
font-size: 32px;
text-align: center;
border-bottom: none;
margin-top: 60px;
margin-bottom: 10px;
}
h1:first-of-type + blockquote {
text-align: center;
border-left: none;
background: none;
font-size: 18px;
margin-bottom: 60px;
}
/* 프린트 스타일 */
@media print {
body {
padding: 15mm;
}
pre {
white-space: pre-wrap;
word-wrap: break-word;
}
h1, h2, h3 {
page-break-after: avoid;
}
table, pre, blockquote {
page-break-inside: avoid;
}
}
/* 페이지 구분 */
.page-break {
page-break-before: always;
}
</style>
</head>
<body>
<h1 id="apc">동물약 APC 매핑 가이드</h1>
<blockquote>
<p>최종 업데이트: 2026-03-02</p>
</blockquote>
<h2 id="_1">개요</h2>
<p>POS(PIT3000)의 동물약 제품을 APDB의 APC 코드와 매핑하여 제품 정보(용법, 용량, 주의사항) 및 이미지를 표시하기 위한 작업 가이드.</p>
<hr />
<h2 id="_2">현재 상태</h2>
<h3 id="_3">매핑 현황</h3>
<table>
<thead>
<tr>
<th>구분</th>
<th>개수</th>
<th>비율</th>
</tr>
</thead>
<tbody>
<tr>
<td>동물약 총</td>
<td>39개</td>
<td>100%</td>
</tr>
<tr>
<td>APC 매핑됨</td>
<td>7개</td>
<td>18%</td>
</tr>
<tr>
<td><strong>APC 미매핑</strong></td>
<td><strong>32개</strong></td>
<td><strong>82%</strong></td>
</tr>
</tbody>
</table>
<h3 id="_4">매핑 완료 제품</h3>
<table>
<thead>
<tr>
<th>POS 제품명</th>
<th>DrugCode</th>
<th>APC</th>
</tr>
</thead>
<tbody>
<tr>
<td>(판)복합개시딘</td>
<td>LB000003140</td>
<td>0231093520106</td>
</tr>
<tr>
<td>안텔민킹(5kg이상)</td>
<td>LB000003158</td>
<td>0230237810109</td>
</tr>
<tr>
<td>안텔민뽀삐(5kg이하)</td>
<td>LB000003157</td>
<td>0230237010107</td>
</tr>
<tr>
<td>파라캅L(5kg이상)</td>
<td>LB000003159</td>
<td>0230338510101</td>
</tr>
<tr>
<td>파라캅S(5kg이하)</td>
<td>LB000003160</td>
<td>0230347110106</td>
</tr>
<tr>
<td>세레니아정16mg(개멀미약)</td>
<td>LB000003353</td>
<td>0231884610109</td>
</tr>
<tr>
<td>세레니아정24mg(개멀미약)</td>
<td>LB000003354</td>
<td>0231884620107</td>
</tr>
</tbody>
</table>
<hr />
<h2 id="_5">매핑 구조</h2>
<h3 id="_6">데이터베이스 연결</h3>
<pre><code>MSSQL (192.168.0.4\PM2014) PostgreSQL (192.168.0.87:5432)
┌─────────────────────────┐ ┌─────────────────────────┐
│ PM_DRUG.CD_GOODS │ │ apdb_master.apc │
│ - DrugCode │ │ - apc (PK) │
│ - GoodsName │ │ - product_name │
│ - BARCODE │ │ - image_url1 │
│ │ │ - llm_pharm (JSONB) │
├─────────────────────────┤ └─────────────────────────┘
│ PM_DRUG.CD_ITEM_UNIT_ │
│ MEMBER │
│ - DRUGCODE (FK) │
│ - CD_CD_BARCODE ◀───────┼── APC 코드 저장 (023%로 시작)
│ - CHANGE_DATE │
└─────────────────────────┘
</code></pre>
<h3 id="apc_1">APC 매핑 방식</h3>
<ol>
<li><code>CD_ITEM_UNIT_MEMBER</code> 테이블에 <strong>추가 바코드</strong>로 APC 등록</li>
<li>기존 바코드는 유지, APC를 별도 레코드로 INSERT</li>
<li>APC 코드는 <code>023%</code>로 시작 (식별자)</li>
</ol>
<hr />
<h2 id="11">1:1 매핑 가능 후보</h2>
<h3 id="1">✅ 확실한 매핑 (1개)</h3>
<table>
<thead>
<tr>
<th>POS 제품명</th>
<th>DrugCode</th>
<th>APC</th>
<th>APDB 제품명</th>
<th>이미지</th>
</tr>
</thead>
<tbody>
<tr>
<td><strong>제스타제(10정)</strong></td>
<td>LB000003146</td>
<td>8809720800455</td>
<td>제스타제</td>
<td>✅ 있음</td>
</tr>
</tbody>
</table>
<h3 id="1_1">⚠️ 검토 필요 (1개)</h3>
<table>
<thead>
<tr>
<th>POS 제품명</th>
<th>DrugCode</th>
<th>APC 후보</th>
<th>비고</th>
</tr>
</thead>
<tbody>
<tr>
<td>안텔민</td>
<td>S0000001</td>
<td>0230237800003</td>
<td>"안텔민킹"과 "안텔민뽀삐"는 이미 별도 매핑됨. 이 제품이 무엇인지 확인 필요</td>
</tr>
</tbody>
</table>
<h3 id="apdb-3">❌ APDB에 없음 (3개)</h3>
<table>
<thead>
<tr>
<th>POS 제품명</th>
<th>사유</th>
</tr>
</thead>
<tbody>
<tr>
<td>(판)클라펫정50(100정)</td>
<td>APDB엔 "클라펫 정"만 있음 (함량 불일치)</td>
</tr>
<tr>
<td>넥스가드xs(2~3.5kg)</td>
<td>사이즈별 APC 없음</td>
</tr>
<tr>
<td>캐치원캣(2.5~7.5kg)/고양이</td>
<td>APDB에 캐치원 자체가 없음</td>
</tr>
</tbody>
</table>
<hr />
<h2 id="1n-27">1:N 매핑 필요 제품 (27개)</h2>
<p>사이즈별로 세분화된 제품들. 하나의 APDB APC에 여러 POS 제품을 매핑해야 함.</p>
<h3 id="_7">브랜드별 현황</h3>
<table>
<thead>
<tr>
<th>브랜드</th>
<th>POS 제품 수</th>
<th>APDB 존재</th>
<th>비고</th>
</tr>
</thead>
<tbody>
<tr>
<td>다이로하트</td>
<td>3개 (SS/S/M)</td>
<td></td>
<td>다이로하트 츄어블 정</td>
</tr>
<tr>
<td>하트세이버</td>
<td>4개 (mini/S/M/L)</td>
<td></td>
<td>하트세이버 플러스 츄어블</td>
</tr>
<tr>
<td>하트웜솔루션</td>
<td>2개 (S/M)</td>
<td></td>
<td>APDB에 없음</td>
</tr>
<tr>
<td>리펠로</td>
<td>2개 (S/M)</td>
<td></td>
<td>리펠로액 (이미지 있음!)</td>
</tr>
<tr>
<td>캐치원</td>
<td>5개 (SS/S/M/L/캣)</td>
<td></td>
<td>APDB에 없음</td>
</tr>
<tr>
<td>셀라이트</td>
<td>5개 (SS/S/M/L/XL)</td>
<td></td>
<td>셀라이트 액</td>
</tr>
<tr>
<td>넥스가드</td>
<td>2개 (xs/L)</td>
<td></td>
<td>넥스가드 스펙트라</td>
</tr>
<tr>
<td>가드닐</td>
<td>3개 (S/M/L)</td>
<td></td>
<td>가드닐 액</td>
</tr>
<tr>
<td>심피드</td>
<td>2개 (M/L)</td>
<td></td>
<td>APDB에 없음</td>
</tr>
<tr>
<td>하트캅</td>
<td>1개</td>
<td></td>
<td>하트캅-츄어블 정</td>
</tr>
</tbody>
</table>
<hr />
<h2 id="apdb">APDB 통계</h2>
<table>
<thead>
<tr>
<th>항목</th>
<th>수치</th>
</tr>
</thead>
<tbody>
<tr>
<td>전체 APC</td>
<td>16,326개</td>
</tr>
<tr>
<td>이미지 있음</td>
<td>73개 (0.4%)</td>
</tr>
<tr>
<td>LLM 정보 있음</td>
<td>81개 (0.5%)</td>
</tr>
<tr>
<td>동물 관련 키워드</td>
<td>~200개</td>
</tr>
</tbody>
</table>
<p>⚠️ <strong>주의:</strong> APDB에 이미지가 거의 없음. 이미지 표시가 목적이라면 다른 소스 필요.</p>
<hr />
<h2 id="_8">매핑 스크립트</h2>
<h3 id="_9">매핑 후보 찾기</h3>
<pre><code class="language-bash">python backend/scripts/batch_apc_matching.py
</code></pre>
<h3 id="11_1">1:1 매핑 가능 후보 추출</h3>
<pre><code class="language-bash">python backend/scripts/find_1to1_candidates.py
</code></pre>
<h3 id="_10">매핑 실행 (수동)</h3>
<pre><code class="language-python"># backend/scripts/batch_insert_apc.py 참고
MAPPINGS = [
('제스타제(10정)', 'LB000003146', '8809720800455'),
]
</code></pre>
<h3 id="insert">INSERT 쿼리 예시</h3>
<pre><code class="language-sql">INSERT INTO CD_ITEM_UNIT_MEMBER (
DRUGCODE, CD_CD_UNIT, CD_NM_UNIT, CD_MY_UNIT, CD_IN_UNIT,
CD_CD_BARCODE, CD_CD_POS, CHANGE_DATE
) VALUES (
'LB000003146', -- DrugCode
'015', -- 단위코드
1.0, -- 단위명
&lt;기존값&gt;, -- CD_MY_UNIT (기존 레코드에서 복사)
&lt;기존값&gt;, -- CD_IN_UNIT (기존 레코드에서 복사)
'8809720800455', -- APC 바코드
'',
'20260302' -- 변경일자
)
</code></pre>
<hr />
<h2 id="_11">다음 단계</h2>
<ol>
<li><strong>제스타제</strong> 1:1 매핑 실행</li>
<li><strong>안텔민(S0000001)</strong> 제품 확인 후 결정</li>
<li>1:N 매핑 정책 결정 (사이즈별 제품 → 동일 APC?)</li>
<li>이미지 소스 대안 검토 (필요시)</li>
</ol>
<hr />
<h2 id="_12">관련 파일</h2>
<ul>
<li><code>backend/db/dbsetup.py</code> - DB 연결 설정</li>
<li><code>backend/scripts/batch_apc_matching.py</code> - 매칭 후보 찾기</li>
<li><code>backend/scripts/batch_insert_apc.py</code> - 매핑 실행</li>
<li><code>backend/scripts/find_1to1_candidates.py</code> - 1:1 후보 추출</li>
<li><code>backend/app.py</code> - <code>_get_animal_drugs()</code>, <code>_get_animal_drug_rag()</code></li>
</ul>
</body>
</html>