feat: Excel 형식 자동 감지 및 다중 형식 지원 추가

✨ 새로운 기능 - 한의사랑, 한의정보 Excel 형식 자동 감지 - ExcelProcessor 클래스로 형식별 처리 로직 분리 - 각 형식에 맞는 컬럼 매핑 자동 적용 📊 지원하는 Excel 형식 1. 한의사랑 형식 - 품목명, 제품코드, 일그램당단가, 원산지 등 - 단가가 이미 계산된 형식 2. 한의정보 형식 - 제품코드, 업체명, 약재명, 구입일자 등 - 업체명이 포함된 형식 🔧 기술적 변경사항 - excel_processor.py 모듈 추가 - 형식 감지 및 검증 로직 구현 - 표준 형식으로 자동 변환 기능 - 업로드 응답에 상세 요약 정보 추가 ✅ 테스트 완료 - 한의사랑 형식 업로드 성공 - 한의정보 형식 업로드 성공 - 각 형식당 28종 약재, 88,000g 처리 확인 🤖 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com>
2026-02-15 08:15:59 +00:00 · 2026-02-15 08:15:59 +00:00 · 974000acaa
commit 974000acaa
parent 2fddc89bca
5 changed files with 528 additions and 16 deletions
--- a/analyze_excel_formats.py
+++ b/analyze_excel_formats.py
@ -0,0 +1,198 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Excel 파일 형식 분석 도구
+한의사랑과 한의정보 형식 비교
+"""
+
+import pandas as pd
+import sys
+import os
+
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+
+def analyze_excel_format(file_path, format_name):
+    """Excel 파일 형식 분석"""
+    print(f"\n{'='*60}")
+    print(f"📊 {format_name} 형식 분석")
+    print(f"파일: {file_path}")
+    print('='*60)
+
+    try:
+        # Excel 파일 읽기
+        df = pd.read_excel(file_path)
+
+        # 기본 정보
+        print(f"\n1️⃣ 기본 정보:")
+        print(f"   - 행 개수: {len(df)}")
+        print(f"   - 열 개수: {len(df.columns)}")
+
+        # 컬럼 정보
+        print(f"\n2️⃣ 컬럼 목록:")
+        for i, col in enumerate(df.columns, 1):
+            print(f"   {i}. {col}")
+
+        # 데이터 타입
+        print(f"\n3️⃣ 데이터 타입:")
+        for col in df.columns:
+            print(f"   - {col}: {df[col].dtype}")
+
+        # 샘플 데이터 (처음 3행)
+        print(f"\n4️⃣ 샘플 데이터 (처음 3행):")
+        print(df.head(3).to_string(index=False))
+
+        # 누락 데이터 확인
+        print(f"\n5️⃣ 누락 데이터:")
+        null_counts = df.isnull().sum()
+        for col in df.columns:
+            if null_counts[col] > 0:
+                print(f"   - {col}: {null_counts[col]}개 누락")
+        if null_counts.sum() == 0:
+            print("   - 누락 데이터 없음")
+
+        # 고유값 개수 (참고용)
+        print(f"\n6️⃣ 고유값 개수:")
+        for col in df.columns:
+            unique_count = df[col].nunique()
+            print(f"   - {col}: {unique_count}개")
+
+        return df
+
+    except Exception as e:
+        print(f"❌ 오류 발생: {str(e)}")
+        return None
+
+def compare_formats(df1, df2, name1, name2):
+    """두 형식 비교"""
+    print(f"\n{'='*60}")
+    print(f"🔄 {name1} vs {name2} 형식 비교")
+    print('='*60)
+
+    if df1 is None or df2 is None:
+        print("비교할 수 없습니다 (데이터 로드 실패)")
+        return
+
+    cols1 = set(df1.columns)
+    cols2 = set(df2.columns)
+
+    # 공통 컬럼
+    common = cols1.intersection(cols2)
+    print(f"\n✅ 공통 컬럼 ({len(common)}개):")
+    for col in sorted(common):
+        print(f"   - {col}")
+
+    # 한의사랑에만 있는 컬럼
+    only_in_1 = cols1 - cols2
+    if only_in_1:
+        print(f"\n📌 {name1}에만 있는 컬럼 ({len(only_in_1)}개):")
+        for col in sorted(only_in_1):
+            print(f"   - {col}")
+
+    # 한의정보에만 있는 컬럼
+    only_in_2 = cols2 - cols1
+    if only_in_2:
+        print(f"\n📌 {name2}에만 있는 컬럼 ({len(only_in_2)}개):")
+        for col in sorted(only_in_2):
+            print(f"   - {col}")
+
+    # 컬럼명 매핑 추천
+    print(f"\n🔗 컬럼 매핑 추천:")
+
+    # 가능한 매핑 찾기
+    mappings = []
+
+    # 날짜 관련
+    date_cols1 = [c for c in cols1 if '일' in c or '날짜' in c or 'date' in c.lower()]
+    date_cols2 = [c for c in cols2 if '일' in c or '날짜' in c or 'date' in c.lower()]
+    if date_cols1 and date_cols2:
+        mappings.append((date_cols1[0], date_cols2[0], "날짜"))
+
+    # 약재명 관련
+    herb_cols1 = [c for c in cols1 if '약재' in c or '품목' in c or '제품' in c]
+    herb_cols2 = [c for c in cols2 if '약재' in c or '품목' in c or '제품' in c]
+    if herb_cols1 and herb_cols2:
+        mappings.append((herb_cols1[0], herb_cols2[0], "약재명"))
+
+    # 수량 관련
+    qty_cols1 = [c for c in cols1 if '수량' in c or '량' in c or '구입량' in c]
+    qty_cols2 = [c for c in cols2 if '수량' in c or '량' in c or '구입량' in c]
+    if qty_cols1 and qty_cols2:
+        mappings.append((qty_cols1[0], qty_cols2[0], "수량"))
+
+    # 금액 관련
+    amt_cols1 = [c for c in cols1 if '금액' in c or '액' in c or '가격' in c]
+    amt_cols2 = [c for c in cols2 if '금액' in c or '액' in c or '가격' in c]
+    if amt_cols1 and amt_cols2:
+        mappings.append((amt_cols1[0], amt_cols2[0], "금액"))
+
+    # 업체 관련
+    supplier_cols1 = [c for c in cols1 if '업체' in c or '도매' in c or '공급' in c]
+    supplier_cols2 = [c for c in cols2 if '업체' in c or '도매' in c or '공급' in c]
+    if supplier_cols1 and supplier_cols2:
+        mappings.append((supplier_cols1[0], supplier_cols2[0], "공급업체"))
+
+    # 원산지 관련
+    origin_cols1 = [c for c in cols1 if '원산지' in c or '산지' in c]
+    origin_cols2 = [c for c in cols2 if '원산지' in c or '산지' in c]
+    if origin_cols1 and origin_cols2:
+        mappings.append((origin_cols1[0], origin_cols2[0], "원산지"))
+
+    for col1, col2, mapping_type in mappings:
+        print(f"   - {mapping_type}: [{name1}]{col1} ↔ [{name2}]{col2}")
+
+def main():
+    """메인 함수"""
+    print("\n" + "="*60)
+    print("🏥 한약 입고장 Excel 형식 분석기")
+    print("="*60)
+
+    # 파일 경로
+    hanisarang_path = '/root/kdrug/sample/한의사랑.xlsx'
+    haninfo_path = '/root/kdrug/sample/한의정보.xlsx'
+    current_path = '/root/kdrug/sample/order_view_20260215154829.xlsx'
+
+    # 각 형식 분석
+    df_hanisarang = None
+    df_haninfo = None
+    df_current = None
+
+    if os.path.exists(hanisarang_path):
+        df_hanisarang = analyze_excel_format(hanisarang_path, "한의사랑")
+    else:
+        print(f"❌ 한의사랑 파일을 찾을 수 없음: {hanisarang_path}")
+
+    if os.path.exists(haninfo_path):
+        df_haninfo = analyze_excel_format(haninfo_path, "한의정보")
+    else:
+        print(f"❌ 한의정보 파일을 찾을 수 없음: {haninfo_path}")
+
+    # 현재 사용 중인 형식도 분석
+    if os.path.exists(current_path):
+        df_current = analyze_excel_format(current_path, "현재 사용 중")
+
+    # 형식 비교
+    if df_hanisarang is not None and df_haninfo is not None:
+        compare_formats(df_hanisarang, df_haninfo, "한의사랑", "한의정보")
+
+    # 통합 매핑 제안
+    print(f"\n{'='*60}")
+    print("💡 통합 컬럼 매핑 제안")
+    print('='*60)
+
+    print("""
+시스템에서 사용할 표준 컬럼:
+1. insurance_code (보험코드/제품코드)
+2. supplier_name (업체명/도매상)
+3. herb_name (약재명/품목명)
+4. receipt_date (구입일자/입고일)
+5. quantity (구입량/수량) - 그램 단위
+6. total_amount (구입액/금액)
+7. origin_country (원산지)
+8. unit_price (단가) - 계산 가능한 경우
+
+각 형식별 매핑 규칙을 자동으로 적용하여
+어떤 형식의 Excel 파일도 처리 가능하도록 구현 가능
+""")
+
+if __name__ == "__main__":
+    main()
--- a/app.py
+++ b/app.py
@ -14,6 +14,7 @@ import pandas as pd
 from werkzeug.utils import secure_filename
 import json
 from contextlib import contextmanager
+from excel_processor import ExcelProcessor

 # Flask 앱 초기화
 app = Flask(__name__, static_folder='static', template_folder='templates')
@ -230,7 +231,7 @@ def get_formula_ingredients(formula_id):

@app.route('/api/upload/purchase', methods=['POST'])
 def upload_purchase_excel():
-    """Excel 파일 업로드 및 입고 처리"""
+    """Excel 파일 업로드 및 입고 처리 (한의사랑/한의정보 형식 자동 감지)"""
    try:
        if 'file' not in request.files:
            return jsonify({'success': False, 'error': '파일이 없습니다'}), 400
@ -249,25 +250,36 @@ def upload_purchase_excel():
        filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
        file.save(filepath)

-        # Excel 파일 읽기
-        df = pd.read_excel(filepath)
+        # Excel 프로세서로 파일 처리
+        processor = ExcelProcessor()
+        if not processor.read_excel(filepath):
+            return jsonify({'success': False, 'error': 'Excel 파일을 읽을 수 없습니다'}), 400

-        # 컬럼 매핑 (Excel 컬럼명 -> DB 필드)
-        column_mapping = {
-            '제품코드': 'insurance_code',
-            '업체명': 'supplier_name',
-            '약재명': 'herb_name',
-            '구입일자': 'receipt_date',
-            '구입량': 'quantity',
-            '구입액': 'total_amount',
-            '원산지': 'origin_country'
-        }
+        # 형식 감지 및 처리
+        try:
+            df = processor.process()
+        except ValueError as e:
+            return jsonify({
+                'success': False,
+                'error': f'지원하지 않는 Excel 형식입니다: {str(e)}'
+            }), 400

-        df = df.rename(columns=column_mapping)
+        # 데이터 검증
+        valid, msg = processor.validate_data()
+        if not valid:
+            return jsonify({'success': False, 'error': f'데이터 검증 실패: {msg}'}), 400
+
+        # 표준 형식으로 변환
+        df = processor.export_to_standard()
+
+        # 처리 요약 정보
+        summary = processor.get_summary()

        # 데이터 처리
        with get_db() as conn:
            cursor = conn.cursor()
+            processed_rows = 0
+            processed_items = set()

            # 날짜별, 업체별로 그룹화
            grouped = df.groupby(['receipt_date', 'supplier_name'])
@ -343,10 +355,26 @@ def upload_purchase_excel():
                        VALUES ('RECEIPT', ?, ?, ?, ?, 'purchase_receipts', ?)
                    """, (herb_item_id, lot_id, quantity, unit_price, receipt_id))

+                    processed_rows += 1
+                    processed_items.add(row['herb_name'])
+
+        # 응답 메시지 생성
+        format_name = {
+            'hanisarang': '한의사랑',
+            'haninfo': '한의정보'
+        }.get(summary['format_type'], '알 수 없음')
+
        return jsonify({
            'success': True,
-            'message': f'입고 데이터가 성공적으로 처리되었습니다',
-            'filename': filename
+            'message': f'{format_name} 형식 입고 데이터가 성공적으로 처리되었습니다',
+            'filename': filename,
+            'summary': {
+                'format': format_name,
+                'processed_rows': processed_rows,
+                'total_items': len(processed_items),
+                'total_quantity': f"{summary['total_quantity']:,.0f}g",
+                'total_amount': f"{summary['total_amount']:,.0f}원"
+            }
        })

    except Exception as e:
--- a/excel_processor.py
+++ b/excel_processor.py
@ -0,0 +1,286 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Excel 파일 처리 모듈
+한의사랑, 한의정보 형식 자동 감지 및 처리
+"""
+
+import pandas as pd
+import numpy as np
+from datetime import datetime
+import re
+
+class ExcelProcessor:
+    """Excel 파일 형식별 처리 클래스"""
+
+    # 한의사랑 형식 컬럼 매핑
+    HANISARANG_MAPPING = {
+        '품목명': 'herb_name',
+        '제품코드': 'insurance_code',
+        '일그램당단가': 'unit_price',
+        '원산지': 'origin_country',
+        '적용일': 'receipt_date',
+        '총구입량': 'quantity',
+        '총구입단가': 'total_amount'
+    }
+
+    # 한의정보 형식 컬럼 매핑
+    HANINFO_MAPPING = {
+        '제품코드': 'insurance_code',
+        '업체명': 'supplier_name',
+        '약재명': 'herb_name',
+        '구입일자': 'receipt_date',
+        '구입량': 'quantity',
+        '구입액': 'total_amount',
+        '원산지': 'origin_country',
+        '비고': 'notes'
+    }
+
+    def __init__(self):
+        self.format_type = None
+        self.df_original = None
+        self.df_processed = None
+
+    def detect_format(self, df):
+        """Excel 형식 자동 감지"""
+        columns = df.columns.tolist()
+
+        # 한의사랑 형식 체크
+        hanisarang_cols = ['품목명', '제품코드', '일그램당단가', '총구입량', '총구입단가']
+        if all(col in columns for col in hanisarang_cols):
+            return 'hanisarang'
+
+        # 한의정보 형식 체크
+        haninfo_cols = ['제품코드', '업체명', '약재명', '구입일자', '구입량', '구입액']
+        if all(col in columns for col in haninfo_cols):
+            return 'haninfo'
+
+        # 기본 형식 (제품코드가 있는 경우 한의정보로 간주)
+        if '제품코드' in columns and '약재명' in columns:
+            return 'haninfo'
+
+        return 'unknown'
+
+    def read_excel(self, file_path):
+        """Excel 파일 읽기"""
+        try:
+            self.df_original = pd.read_excel(file_path)
+            self.format_type = self.detect_format(self.df_original)
+            return True
+        except Exception as e:
+            print(f"Excel 파일 읽기 실패: {str(e)}")
+            return False
+
+    def process_hanisarang(self):
+        """한의사랑 형식 처리"""
+        df = self.df_original.copy()
+
+        # 컬럼 매핑
+        df_mapped = pd.DataFrame()
+
+        for old_col, new_col in self.HANISARANG_MAPPING.items():
+            if old_col in df.columns:
+                df_mapped[new_col] = df[old_col]
+
+        # 업체명 추가 (기본값)
+        df_mapped['supplier_name'] = '한의사랑'
+
+        # 날짜 처리
+        if 'receipt_date' in df_mapped.columns:
+            df_mapped['receipt_date'] = pd.to_datetime(
+                df_mapped['receipt_date'],
+                format='%Y-%m-%d',
+                errors='coerce'
+            ).dt.strftime('%Y%m%d')
+
+        # 단가 계산 (이미 있지만 검증)
+        if 'unit_price' not in df_mapped.columns or df_mapped['unit_price'].isnull().all():
+            if 'total_amount' in df_mapped.columns and 'quantity' in df_mapped.columns:
+                df_mapped['unit_price'] = df_mapped['total_amount'] / df_mapped['quantity']
+
+        self.df_processed = df_mapped
+        return df_mapped
+
+    def process_haninfo(self):
+        """한의정보 형식 처리"""
+        df = self.df_original.copy()
+
+        # 컬럼 매핑
+        df_mapped = pd.DataFrame()
+
+        for old_col, new_col in self.HANINFO_MAPPING.items():
+            if old_col in df.columns:
+                df_mapped[new_col] = df[old_col]
+
+        # 날짜 처리 (YYYYMMDD 형식)
+        if 'receipt_date' in df_mapped.columns:
+            df_mapped['receipt_date'] = df_mapped['receipt_date'].astype(str)
+
+        # 단가 계산
+        if 'total_amount' in df_mapped.columns and 'quantity' in df_mapped.columns:
+            df_mapped['unit_price'] = df_mapped['total_amount'] / df_mapped['quantity']
+            df_mapped['unit_price'] = df_mapped['unit_price'].round(2)
+
+        self.df_processed = df_mapped
+        return df_mapped
+
+    def process(self):
+        """형식에 따라 자동 처리"""
+        if self.format_type == 'hanisarang':
+            return self.process_hanisarang()
+        elif self.format_type == 'haninfo':
+            return self.process_haninfo()
+        else:
+            raise ValueError(f"지원하지 않는 형식: {self.format_type}")
+
+    def validate_data(self):
+        """처리된 데이터 검증"""
+        if self.df_processed is None:
+            return False, "처리된 데이터가 없습니다"
+
+        df = self.df_processed
+
+        # 필수 컬럼 확인
+        required_columns = ['herb_name', 'quantity', 'total_amount']
+        missing_cols = [col for col in required_columns if col not in df.columns]
+
+        if missing_cols:
+            return False, f"필수 컬럼 누락: {', '.join(missing_cols)}"
+
+        # 데이터 타입 검증
+        numeric_cols = ['quantity', 'total_amount', 'unit_price']
+        for col in numeric_cols:
+            if col in df.columns:
+                try:
+                    df[col] = pd.to_numeric(df[col], errors='coerce')
+                except:
+                    return False, f"{col} 컬럼이 숫자 형식이 아닙니다"
+
+        # NULL 값 확인
+        null_check = df[required_columns].isnull().sum()
+        if null_check.sum() > 0:
+            null_cols = null_check[null_check > 0].index.tolist()
+            return False, f"NULL 값 포함 컬럼: {', '.join(null_cols)}"
+
+        # 음수 값 확인
+        for col in ['quantity', 'total_amount']:
+            if col in df.columns:
+                if (df[col] < 0).any():
+                    return False, f"{col} 컬럼에 음수 값이 있습니다"
+
+        return True, "검증 통과"
+
+    def get_summary(self):
+        """처리 결과 요약"""
+        if self.df_processed is None:
+            return None
+
+        df = self.df_processed
+
+        summary = {
+            'format_type': self.format_type,
+            'total_rows': len(df),
+            'total_items': df['herb_name'].nunique() if 'herb_name' in df.columns else 0,
+            'total_quantity': df['quantity'].sum() if 'quantity' in df.columns else 0,
+            'total_amount': df['total_amount'].sum() if 'total_amount' in df.columns else 0,
+            'suppliers': df['supplier_name'].unique().tolist() if 'supplier_name' in df.columns else [],
+            'date_range': None
+        }
+
+        # 날짜 범위
+        if 'receipt_date' in df.columns:
+            dates = pd.to_datetime(df['receipt_date'], format='%Y%m%d', errors='coerce')
+            dates = dates.dropna()
+            if not dates.empty:
+                summary['date_range'] = {
+                    'start': dates.min().strftime('%Y-%m-%d'),
+                    'end': dates.max().strftime('%Y-%m-%d')
+                }
+
+        return summary
+
+    def export_to_standard(self):
+        """표준 형식으로 변환"""
+        if self.df_processed is None:
+            return None
+
+        # 표준 컬럼 순서
+        standard_columns = [
+            'insurance_code', 'supplier_name', 'herb_name',
+            'receipt_date', 'quantity', 'total_amount',
+            'unit_price', 'origin_country', 'notes'
+        ]
+
+        # 있는 컬럼만 선택
+        available_cols = [col for col in standard_columns if col in self.df_processed.columns]
+        df_standard = self.df_processed[available_cols].copy()
+
+        # 누락된 컬럼 추가 (기본값)
+        for col in standard_columns:
+            if col not in df_standard.columns:
+                if col == 'notes':
+                    df_standard[col] = ''
+                elif col == 'supplier_name':
+                    df_standard[col] = '미지정'
+                else:
+                    df_standard[col] = None
+
+        return df_standard[standard_columns]
+
+
+# 테스트 함수
+def test_processor():
+    """프로세서 테스트"""
+    processor = ExcelProcessor()
+
+    # 한의사랑 테스트
+    print("="*60)
+    print("한의사랑 형식 테스트")
+    print("="*60)
+
+    if processor.read_excel('/root/kdrug/sample/한의사랑.xlsx'):
+        print(f"형식 감지: {processor.format_type}")
+        df = processor.process()
+        print(f"처리된 행 수: {len(df)}")
+
+        valid, msg = processor.validate_data()
+        print(f"검증 결과: {msg}")
+
+        summary = processor.get_summary()
+        print(f"요약:")
+        print(f"  - 총 약재: {summary['total_items']}종")
+        print(f"  - 총 수량: {summary['total_quantity']:,.0f}g")
+        print(f"  - 총 금액: {summary['total_amount']:,.0f}원")
+
+        # 샘플 출력
+        print("\n처리된 데이터 샘플:")
+        print(df.head(3).to_string())
+
+    # 한의정보 테스트
+    print("\n" + "="*60)
+    print("한의정보 형식 테스트")
+    print("="*60)
+
+    processor2 = ExcelProcessor()
+    if processor2.read_excel('/root/kdrug/sample/한의정보.xlsx'):
+        print(f"형식 감지: {processor2.format_type}")
+        df = processor2.process()
+        print(f"처리된 행 수: {len(df)}")
+
+        valid, msg = processor2.validate_data()
+        print(f"검증 결과: {msg}")
+
+        summary = processor2.get_summary()
+        print(f"요약:")
+        print(f"  - 총 약재: {summary['total_items']}종")
+        print(f"  - 총 수량: {summary['total_quantity']:,.0f}g")
+        print(f"  - 총 금액: {summary['total_amount']:,.0f}원")
+        print(f"  - 공급업체: {', '.join(summary['suppliers'])}")
+
+        # 샘플 출력
+        print("\n처리된 데이터 샘플:")
+        print(df.head(3).to_string())
+
+
+if __name__ == "__main__":
+    test_processor()
--- a/sample/한의사랑.xlsx
+++ b/sample/한의사랑.xlsx
--- a/sample/한의정보.xlsx
+++ b/sample/한의정보.xlsx