#!/usr/bin/env python3 """ Drug Safety Intelligence Report Datasets used: fda.faers fda.orangebook clinicaltrials.studies clinvar.variants pubmed.baseline Cross-dataset value: FAERS tells you what adverse events occur in the real world post-approval. Orange Book tells you what the FDA approved and which generic alternatives exist. ClinicalTrials tells you what is being studied now. ClinVar tells you which patients carry genetic variants linked to the condition. PubMed tells you what the recent literature says. Together they answer: "Is this drug safe for my patient population, what genetic risk factors exist, and what is the current research trajectory?" Usage: export MICROQUERY_TOKEN=your_token python3 drug_safety_report.py --drug metformin --condition diabetes python3 drug_safety_report.py --drug warfarin --condition "atrial fibrillation" """ import argparse import os import sys from concurrent.futures import ThreadPoolExecutor from client import MicroqueryClient, QueryError def header(title: str): print(f"\n{'='*60}") print(f" {title}") print(f"{'='*60}") def section(title: str): print(f"\n--- {title} ---") def run(drug: str, condition: str, mq: MicroqueryClient): drug_lower = drug.lower() cond_lower = condition.lower() header(f"DRUG SAFETY REPORT: {drug.upper()}") with ThreadPoolExecutor(max_workers=5) as ex: # ------------------------------------------------------------------ # # Fire all 5 independent queries in parallel. # # PubMed (59.8 GB) and ClinicalTrials are the longest; firing them # # in the background while the smaller FAERS/OrangeBook run first. # # ------------------------------------------------------------------ # f_pubmed = ex.submit(mq.query, "pubmed", f""" SELECT pub_year, COUNT(*) AS publications FROM baseline WHERE MedlineCitation.Article.ArticleTitle ~ '(?i){drug_lower}' AND pub_year >= 2020 GROUP BY pub_year ORDER BY pub_year DESC LIMIT 10 """, verbose=True) f_trials = ex.submit(mq.query, "clinicaltrials", f""" SELECT nct_id, brief_title, overall_status, phase, enrollment, start_date FROM studies WHERE (brief_title ~ '(?i){drug_lower}' OR conditions ~ '(?i){cond_lower}') AND overall_status IN ('RECRUITING', 'ACTIVE_NOT_RECRUITING', 'ENROLLING_BY_INVITATION') ORDER BY start_date DESC LIMIT 10 """, verbose=True) f_clinvar = ex.submit(mq.query, "clinvar", f""" SELECT gene_symbol, COUNT(*) AS pathogenic_variants, SUM(CASE WHEN n_submitters >= 3 THEN 1 ELSE 0 END) AS well_reviewed FROM variants, UNNEST(phenotypes) AS ph WHERE assembly = 'GRCh38' AND sig_simple = 1 AND ph ~ '(?i){cond_lower}' GROUP BY gene_symbol ORDER BY well_reviewed DESC, pathogenic_variants DESC LIMIT 15 """, verbose=True) # FAERS and OrangeBook are fast (~6 GB combined); run sequentially # in the foreground while the large queries are in flight. # reactions is an array; group by primary reaction (reactions[0]) # outcomes codes: DE=death, LT=life-threatening, HO=hospitalization, DS=disability faers_rows = mq.query("fda", f""" SELECT reactions[0] AS reaction, COUNT(*) AS reports, SUM(CASE WHEN outcomes[0]='DE' OR outcomes[1]='DE' OR outcomes[2]='DE' THEN 1 ELSE 0 END) AS deaths, SUM(CASE WHEN outcomes[0]='HO' OR outcomes[1]='HO' OR outcomes[2]='HO' THEN 1 ELSE 0 END) AS hospitalizations FROM faers WHERE drugname ~ '(?i){drug_lower}' GROUP BY reactions[0] ORDER BY reports DESC LIMIT 15 """, verbose=True) # appl_type: 'N'=NDA (brand), 'A'=ANDA (generic) # active_ingredients is an array of {name, strength} objects ob_rows = mq.query("fda", f""" SELECT application_number, appl_type, brand_name, active_ingredients[0].name AS ingredient, active_ingredients[0].strength AS strength, dosage_form, route, te_code, marketing_status FROM orangebook WHERE active_ingredients[0].name ~ '(?i){drug_lower}' OR brand_name ~ '(?i){drug_lower}' ORDER BY appl_type, application_number LIMIT 20 """, verbose=True) # Collect background results (likely already done by now) trial_rows = f_trials.result() clinvar_rows = f_clinvar.result() pubmed_rows = f_pubmed.result() # ------------------------------------------------------------------ # # Print results in logical order # # ------------------------------------------------------------------ # # ------------------------------------------------------------------ # # 1. FAERS — real-world adverse event profile # # ------------------------------------------------------------------ # section("1. Real-World Adverse Events (FDA FAERS)") if faers_rows: total_reports = sum(r["reports"] for r in faers_rows) print(f" Top adverse reactions across {total_reports:,} reports (by primary event):") for r in faers_rows[:10]: hosp_pct = r["hospitalizations"] / r["reports"] * 100 if r["reports"] else 0 print( f" {str(r['reaction']):<40} {r['reports']:>6} reports " f"{hosp_pct:>5.1f}% hosp" + (f" ⚠ {r['deaths']} deaths" if r["deaths"] else "") ) else: print(f" No FAERS reports found for '{drug}'") # ------------------------------------------------------------------ # # 2. Orange Book — approval status and generic availability # # ------------------------------------------------------------------ # section("2. FDA Approval Status (Orange Book)") if ob_rows: nda = [r for r in ob_rows if r.get("appl_type") == "N"] anda = [r for r in ob_rows if r.get("appl_type") == "A"] print(f" Brand applications (NDA): {len(nda)}") print(f" Generic applications (ANDA): {len(anda)}") for r in (nda or ob_rows)[:3]: appl_label = "NDA" if r.get("appl_type") == "N" else "ANDA" print( f" {appl_label} {r['application_number']} " f"{r.get('ingredient','')} {r.get('dosage_form','')} {r.get('route','')} " f"{r.get('strength','')} {r.get('marketing_status','')} TE:{r.get('te_code','')}" ) else: print(f" No Orange Book entries found for '{drug}'") # ------------------------------------------------------------------ # # 3. ClinicalTrials — active research # # ------------------------------------------------------------------ # section("3. Active Clinical Trials") if trial_rows: print(f" {len(trial_rows)} actively recruiting or ongoing trials:") for r in trial_rows: phase = r.get("phase") or "N/A" n = r.get("enrollment") or "?" print(f" {r['nct_id']} Phase {phase:<3} n={n:<6} {r['brief_title'][:60]}") else: print(f" No active trials found for '{drug}' / '{condition}'") # ------------------------------------------------------------------ # # 4. ClinVar — genetic variants linked to the condition # # ------------------------------------------------------------------ # section(f"4. Genetic Risk Variants (ClinVar — condition: {condition})") if clinvar_rows: print(f" Genes with pathogenic variants linked to '{condition}':") for r in clinvar_rows[:10]: print( f" {r['gene_symbol']:<12} {r['pathogenic_variants']:>4} pathogenic " f"({r['well_reviewed']} well-reviewed)" ) else: print(f" No ClinVar variants found for '{condition}'") # ------------------------------------------------------------------ # # 5. PubMed — recent literature pulse # # ------------------------------------------------------------------ # section("5. Recent Literature (PubMed)") if pubmed_rows: total_pubs = sum(r["publications"] for r in pubmed_rows) print(f" {total_pubs:,} PubMed publications mentioning '{drug}' since 2020:") for r in pubmed_rows: bar = "█" * min(r["publications"] // 50, 40) print(f" {r['pub_year']} {r['publications']:>5} {bar}") else: print(f" No PubMed entries found for '{drug}'") # ------------------------------------------------------------------ # # Summary # # ------------------------------------------------------------------ # header("SUMMARY") print(mq.cost_summary()) def main(): parser = argparse.ArgumentParser(description="Drug safety intelligence report") parser.add_argument("--drug", required=True, help="Drug name (e.g. metformin)") parser.add_argument("--condition", required=True, help="Condition (e.g. diabetes)") parser.add_argument("--token", default=os.environ.get("MICROQUERY_TOKEN"), help="Microquery API token (or set MICROQUERY_TOKEN)") args = parser.parse_args() if not args.token: print("Error: set MICROQUERY_TOKEN or pass --token", file=sys.stderr) sys.exit(1) mq = MicroqueryClient(api_key=args.token) try: run(args.drug, args.condition, mq) except QueryError as e: print(f"Query error: {e}", file=sys.stderr) sys.exit(1) if __name__ == "__main__": main()