""" SQLite-Graph를 사용한 PubMed GraphRAG 구현 기존 SQL JOIN → Cypher 쿼리로 변환 훨씬 간단하고 직관적인 그래프 탐색 """ import sys import os # UTF-8 인코딩 강제 if sys.platform == 'win32': import io sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8') import sqlite3 try: import sqlite_graph except ImportError: print("[WARNING] sqlite-graph 미설치. 설치: pip install sqlite-graph") print("[INFO] 기존 SQL로 데모 실행합니다.") sqlite_graph = None # ============================================================ # SQLite-Graph 초기화 # ============================================================ def init_graph_db(): """SQLite-Graph 데이터베이스 초기화""" db_path = os.path.join(os.path.dirname(__file__), 'db', 'knowledge_graph.db') conn = sqlite3.connect(db_path) if sqlite_graph: # SQLite-Graph 확장 로드 sqlite_graph.load(conn) cursor = conn.cursor() try: # 그래프 테이블 생성 (SQLite-Graph 사용 시) if sqlite_graph: cursor.execute(""" CREATE VIRTUAL TABLE IF NOT EXISTS graph USING graph_table() """) print("[OK] 그래프 DB 초기화 완료") return conn except Exception as e: print(f"[ERROR] 초기화 실패: {e}") return conn # ============================================================ # 데이터 삽입: Cypher vs SQL 비교 # ============================================================ def insert_data_with_cypher(conn): """Cypher로 노드와 관계 생성""" if not sqlite_graph: print("[SKIP] sqlite-graph 미설치") return cursor = conn.cursor() print("\n" + "=" * 80) print("Cypher로 지식 그래프 구축") print("=" * 80) # 1. 노드 생성 cypher_create_nodes = """ CREATE (statin:Drug {name: 'Statin', type: 'HMG-CoA inhibitor'}), (coq10:Drug {name: 'CoQ10', type: 'Supplement'}), (myopathy:Condition {name: 'Myopathy', description: '근육병증'}), (htn:PatientProfile {name: 'Patient_with_HTN', description: '고혈압 환자'}), (naproxen:Drug {name: 'Naproxen', type: 'NSAID'}), (ibuprofen:Drug {name: 'Ibuprofen', type: 'NSAID'}), (pmid1:Evidence {pmid: '30371340', title: 'CoQ10 for Statin Myopathy', reliability: 0.95}), (pmid2:Evidence {pmid: '27959716', title: 'CV Safety of NSAIDs', reliability: 0.99}) """ try: cursor.execute(f"SELECT graph_cypher('{cypher_create_nodes}')") print("✅ 노드 생성 완료") except Exception as e: print(f"⚠️ Cypher 노드 생성 실패 (확장 버전 확인 필요): {e}") # 2. 관계 생성 cypher_create_relationships = """ MATCH (statin:Drug {name: 'Statin'}), (coq10:Drug {name: 'CoQ10'}), (myopathy:Condition {name: 'Myopathy'}), (pmid1:Evidence {pmid: '30371340'}) CREATE (statin)-[:INHIBITS {mechanism: 'HMG-CoA pathway'}]->(coq10), (coq10)-[:REDUCES {effect_size: -1.60, p_value: 0.001}]->(myopathy), (pmid1)-[:SUPPORTS]->(coq10)-[:REDUCES]->(myopathy) """ try: cursor.execute(f"SELECT graph_cypher('{cypher_create_relationships}')") print("✅ 관계 생성 완료") except Exception as e: print(f"⚠️ Cypher 관계 생성 실패: {e}") conn.commit() def insert_data_with_sql(conn): """기존 SQL 방식으로 데이터 삽입 (비교용)""" cursor = conn.cursor() print("\n" + "=" * 80) print("SQL로 지식 그래프 구축 (기존 방식)") print("=" * 80) try: # Entities 테이블 생성 cursor.execute(""" CREATE TABLE IF NOT EXISTS entities ( id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT UNIQUE NOT NULL, type TEXT NOT NULL, properties TEXT ) """) # Relationships 테이블 생성 cursor.execute(""" CREATE TABLE IF NOT EXISTS relationships ( id INTEGER PRIMARY KEY AUTOINCREMENT, subject_id INTEGER, predicate TEXT, object_id INTEGER, properties TEXT, FOREIGN KEY (subject_id) REFERENCES entities(id), FOREIGN KEY (object_id) REFERENCES entities(id) ) """) # 샘플 데이터 삽입 entities = [ ('Statin', 'Drug', '{"description": "HMG-CoA inhibitor"}'), ('CoQ10', 'Drug', '{"description": "Supplement"}'), ('Myopathy', 'Condition', '{"description": "근육병증"}'), ('Naproxen', 'Drug', '{"type": "NSAID"}'), ('Ibuprofen', 'Drug', '{"type": "NSAID"}'), ] for name, entity_type, props in entities: cursor.execute(""" INSERT OR IGNORE INTO entities (name, type, properties) VALUES (?, ?, ?) """, (name, entity_type, props)) # 관계 삽입 cursor.execute(""" INSERT OR IGNORE INTO relationships (subject_id, predicate, object_id, properties) SELECT (SELECT id FROM entities WHERE name='Statin'), 'INHIBITS', (SELECT id FROM entities WHERE name='CoQ10'), '{"mechanism": "HMG-CoA pathway"}' """) cursor.execute(""" INSERT OR IGNORE INTO relationships (subject_id, predicate, object_id, properties) SELECT (SELECT id FROM entities WHERE name='CoQ10'), 'REDUCES', (SELECT id FROM entities WHERE name='Myopathy'), '{"effect_size": -1.60, "p_value": 0.001}' """) conn.commit() print("✅ SQL 데이터 삽입 완료") except Exception as e: print(f"[ERROR] SQL 삽입 실패: {e}") # ============================================================ # 쿼리 비교: Cypher vs SQL # ============================================================ def query_with_cypher(conn): """Cypher로 그래프 쿼리""" if not sqlite_graph: print("[SKIP] sqlite-graph 미설치") return cursor = conn.cursor() print("\n" + "=" * 80) print("Cypher 쿼리 예시") print("=" * 80) # 예시 1: 2-hop 경로 탐색 print("\n[쿼리 1] Statin → ? → Myopathy 경로 찾기") cypher_query_1 = """ MATCH (statin:Drug {name: 'Statin'})-[r1]->(middle)-[r2]->(myopathy:Condition {name: 'Myopathy'}) RETURN statin.name, type(r1), middle.name, type(r2), myopathy.name """ try: cursor.execute(f"SELECT graph_cypher('{cypher_query_1}')") results = cursor.fetchall() for row in results: print(f" {row}") except Exception as e: print(f" ⚠️ 쿼리 실패: {e}") # 예시 2: 특정 약물의 모든 관계 print("\n[쿼리 2] Naproxen의 모든 관계 찾기") cypher_query_2 = """ MATCH (naproxen:Drug {name: 'Naproxen'})-[r]->(target) RETURN naproxen.name, type(r), target.name """ try: cursor.execute(f"SELECT graph_cypher('{cypher_query_2}')") results = cursor.fetchall() for row in results: print(f" {row}") except Exception as e: print(f" ⚠️ 쿼리 실패: {e}") # 예시 3: 근거가 있는 관계만 필터링 print("\n[쿼리 3] 근거(Evidence)가 있는 약물-증상 관계") cypher_query_3 = """ MATCH (drug:Drug)-[treats:REDUCES]->(condition:Condition)<-[:SUPPORTS]-(evidence:Evidence) WHERE evidence.reliability > 0.9 RETURN drug.name, condition.name, evidence.pmid, evidence.reliability """ try: cursor.execute(f"SELECT graph_cypher('{cypher_query_3}')") results = cursor.fetchall() for row in results: print(f" {row}") except Exception as e: print(f" ⚠️ 쿼리 실패: {e}") def query_with_sql(conn): """기존 SQL로 동일한 쿼리 수행 (비교용)""" cursor = conn.cursor() print("\n" + "=" * 80) print("SQL 쿼리 예시 (기존 방식)") print("=" * 80) # 예시 1: 2-hop 경로 (복잡한 JOIN) print("\n[쿼리 1] Statin → ? → Myopathy 경로 찾기 (SQL)") sql_query_1 = """ SELECT e1.name AS start, r1.predicate AS rel1, e2.name AS middle, r2.predicate AS rel2, e3.name AS end FROM relationships r1 JOIN entities e1 ON r1.subject_id = e1.id JOIN entities e2 ON r1.object_id = e2.id JOIN relationships r2 ON r2.subject_id = e2.id JOIN entities e3 ON r2.object_id = e3.id WHERE e1.name = 'Statin' AND e3.name = 'Myopathy' """ try: cursor.execute(sql_query_1) results = cursor.fetchall() for row in results: print(f" {row}") if not results: print(" (결과 없음)") except Exception as e: print(f" ⚠️ 쿼리 실패: {e}") # ============================================================ # 그래프 알고리즘 예시 (SQLite-Graph 기능) # ============================================================ def graph_algorithms(conn): """SQLite-Graph의 내장 그래프 알고리즘 사용""" if not sqlite_graph: print("[SKIP] sqlite-graph 미설치") return cursor = conn.cursor() print("\n" + "=" * 80) print("그래프 알고리즘") print("=" * 80) try: # 노드 개수 cursor.execute("SELECT graph_count_nodes()") node_count = cursor.fetchone()[0] print(f"총 노드 수: {node_count}") # 엣지 개수 cursor.execute("SELECT graph_count_edges()") edge_count = cursor.fetchone()[0] print(f"총 엣지 수: {edge_count}") # 그래프 밀도 cursor.execute("SELECT graph_density()") density = cursor.fetchone()[0] print(f"그래프 밀도: {density:.4f}") # Degree Centrality (중심성) print("\n노드별 중심성:") cursor.execute(""" SELECT node_name, graph_degree_centrality(node_name) FROM (SELECT DISTINCT name AS node_name FROM entities) ORDER BY graph_degree_centrality(node_name) DESC LIMIT 5 """) for row in cursor.fetchall(): print(f" {row[0]}: {row[1]:.4f}") except Exception as e: print(f"⚠️ 알고리즘 실행 실패: {e}") # ============================================================ # 실제 추천 시스템 예시 # ============================================================ def recommend_with_graph(conn, patient_conditions, symptom): """ SQLite-Graph + Cypher로 약물 추천 장점: - 추론 경로 탐색이 매우 간단 - 다단계 관계 쿼리가 직관적 """ if not sqlite_graph: print("\n[INFO] SQLite-Graph 미설치 시 기존 SQL 사용") return recommend_with_sql(conn, patient_conditions, symptom) cursor = conn.cursor() print("\n" + "=" * 80) print(f"약물 추천: 환자({patient_conditions}) → 증상({symptom})") print("=" * 80) # Cypher로 추천 약물 찾기 cypher_recommend = f""" MATCH (drug:Drug)-[treats:TREATS|REDUCES]->(condition:Condition {{name: '{symptom}'}}) WHERE NOT (drug)-[:CONTRAINDICATED_IN]->(:PatientProfile {{name: 'Patient_with_{patient_conditions[0]}'}}) RETURN drug.name, treats.effect_size, treats.p_value ORDER BY treats.effect_size DESC """ try: cursor.execute(f"SELECT graph_cypher('{cypher_recommend}')") results = cursor.fetchall() if results: print(f"\n✅ 추천 약물:") for row in results: print(f" - {row[0]} (효과: {row[1]}, P-value: {row[2]})") else: print(" (추천 결과 없음)") except Exception as e: print(f"⚠️ 추천 실패: {e}") def recommend_with_sql(conn, patient_conditions, symptom): """기존 SQL 방식 추천 (비교용)""" cursor = conn.cursor() print("\n[SQL 방식 추천]") sql_recommend = """ SELECT e1.name AS drug, r.properties FROM relationships r JOIN entities e1 ON r.subject_id = e1.id JOIN entities e2 ON r.object_id = e2.id WHERE r.predicate IN ('TREATS', 'REDUCES') AND e2.name = ? AND e1.id NOT IN ( SELECT subject_id FROM relationships WHERE predicate = 'CONTRAINDICATED_IN' ) """ try: cursor.execute(sql_recommend, (symptom,)) results = cursor.fetchall() if results: print(f"\n✅ 추천 약물:") for row in results: print(f" - {row[0]}") else: print(" (추천 결과 없음)") except Exception as e: print(f"⚠️ 추천 실패: {e}") # ============================================================ # 비교 요약 # ============================================================ def print_comparison(): """Cypher vs SQL 비교""" print("\n\n" + "=" * 80) print("SQLite-Graph (Cypher) vs 기존 SQL 비교") print("=" * 80) comparison = """ ┌─────────────────────┬──────────────────────────┬──────────────────────────┐ │ 항목 │ SQLite-Graph (Cypher) │ 기존 SQL │ ├─────────────────────┼──────────────────────────┼──────────────────────────┤ │ 그래프 탐색 │ ⭐⭐⭐⭐⭐ (직관적) │ ⭐⭐ (복잡한 JOIN) │ │ 2-hop 쿼리 │ MATCH (a)-[]->(b)-[]->(c) │ 3-way JOIN 필요 │ │ N-hop 경로 찾기 │ 매우 쉬움 │ 재귀 CTE 필요 │ │ 추론 경로 생성 │ 자동 (RETURN path) │ 수동 구현 필요 │ │ 성능 (작은 그래프) │ 비슷 │ 비슷 │ │ 성능 (큰 그래프) │ 더 빠름 (최적화됨) │ JOIN 오버헤드 │ │ 배포 │ 확장 설치 필요 │ SQLite만 있으면 됨 │ │ 학습 곡선 │ Cypher 학습 필요 │ SQL 익숙함 │ │ GraphRAG 적합성 │ ⭐⭐⭐⭐⭐ │ ⭐⭐⭐ │ └─────────────────────┴──────────────────────────┴──────────────────────────┘ 【결론】 ✅ SQLite-Graph 사용 권장: - GraphRAG 추론 경로 생성이 매우 간단 - Cypher의 표현력이 뛰어남 - 그래프 알고리즘 내장 (중심성, 밀도 등) ❌ 기존 SQL 유지가 나은 경우: - 배포 환경에서 확장 설치 불가 - 팀원들이 Cypher에 익숙하지 않음 - 그래프가 매우 단순함 """ print(comparison) # ============================================================ # MAIN # ============================================================ def main(): """메인 실행""" print("\n" + "=" * 80) print("SQLite-Graph 데모: PubMed GraphRAG") print("=" * 80) # 1. DB 초기화 conn = init_graph_db() # 2. 데이터 삽입 (Cypher vs SQL 비교) if sqlite_graph: insert_data_with_cypher(conn) insert_data_with_sql(conn) # 3. 쿼리 비교 if sqlite_graph: query_with_cypher(conn) query_with_sql(conn) # 4. 그래프 알고리즘 if sqlite_graph: graph_algorithms(conn) # 5. 추천 시스템 예시 patient_conditions = ['HTN'] symptom = 'Myopathy' recommend_with_graph(conn, patient_conditions, symptom) # 6. 비교 요약 print_comparison() conn.close() print("\n" + "=" * 80) print("데모 완료") print("=" * 80) if not sqlite_graph: print("\n[TIP] SQLite-Graph 설치: pip install sqlite-graph") print(" GitHub: https://github.com/agentflare-ai/sqlite-graph") if __name__ == '__main__': main()