# 两种使用 Python 获取基因信息的方法
# 使用 Entrez API
此方法需要在 NCBI 使用邮箱注册账号
from Bio import Entrez, Medline | |
def fetch_gene_annotatio(gene_name: str): | |
"""Fetch gene annotation from Entrez""" | |
Entrez.email = "xxxxxxx@xx.com" | |
handle = Entrez.esearch(db="gene", term=gene_name, retmax=1) | |
record = Entrez.read(handle) | |
gene_id = record["IdList"][0] | |
handle = Entrez.esummary(db="gene", id=gene_id) | |
record = Entrez.read(handle) | |
gene_summary = record["DocumentSummarySet"]["DocumentSummary"][0] | |
description = gene_summary.get('Description', 'No description available.') | |
comments = gene_summary.get('Summary', 'No comments available.') | |
return {"description": description, "comments": comments} | |
fetch_gene_annotatio("TP53") |
结果如下
{'description': 'tumor protein p53', | |
'comments': 'This gene encodes a tumor suppressor protein containing transcriptional activation, DNA binding, and oligomerization domains. The encoded protein responds to diverse cellular stresses to regulate expression of target genes, thereby inducing cell cycle arrest, apoptosis, senescence, DNA repair, or changes in metabolism. Mutations in this gene are associated with a variety of human cancers, including hereditary cancers such as Li-Fraumeni syndrome. Alternative splicing of this gene and the use of alternate promoters result in multiple transcript variants and isoforms. Additional isoforms have also been shown to result from the use of alternate translation initiation codons from identical transcript variants (PMIDs: 12032546, 20937277). [provided by RefSeq, Dec 2016]'} |
# 使用 Uniprot API
import requests | |
# UniProt API 查询基因注释 | |
def fetch_gene_annotation(gene_name): | |
""" | |
使用 UniProt API 获取基因注释 | |
:param gene_name: 基因名称 | |
:return: dict | |
""" | |
UNIPROT_API_URL = "https://rest.uniprot.org/uniprotkb/search" | |
try: | |
query_url = f"{UNIPROT_API_URL}?query=gene:{gene_name}+AND+organism_id:9606&format=json" | |
response = requests.get(query_url) | |
response.raise_for_status() | |
data = response.json() | |
result = data.get('results', [None])[0] | |
if not result: | |
return {"description": "No description available", "comments": "No comments available"} | |
description = result.get('proteinDescription', {}).get('recommendedName', {}).get('fullName', {}).get('value', 'No description available.') | |
comments = result.get('comments', [{}])[0].get('texts', [{}])[0].get('value', 'No comments available.') | |
return {"description": description, "comments": comments} | |
except Exception as e: | |
print(f"Error fetching gene annotation for {gene_name}: {e}") | |
return {"description": "Error fetching data", "comments": "Error fetching data"} | |
fetch_gene_annotation("TP53") |
结果如下
{'description': 'Cellular tumor antigen p53', | |
'comments': 'Multifunctional transcription factor that induces cell cycle arrest, DNA repair or apoptosis upon binding to its target DNA sequence (PubMed:11025664, PubMed:12524540, PubMed:12810724, PubMed:15186775, PubMed:15340061, PubMed:17317671, PubMed:17349958, PubMed:19556538, PubMed:20673990, PubMed:20959462, PubMed:22726440, PubMed:24051492, PubMed:24652652, PubMed:35618207, PubMed:36634798, PubMed:38653238, PubMed:9840937). Acts as a tumor suppressor in many tumor types; induces growth arrest or apoptosis depending on the physiological circumstances and cell type (PubMed:11025664, PubMed:12524540, PubMed:12810724, PubMed:15186775, PubMed:15340061, PubMed:17189187, PubMed:17317671, PubMed:17349958, PubMed:19556538, PubMed:20673990, PubMed:20959462, PubMed:22726440, PubMed:24051492, PubMed:24652652, PubMed:38653238, PubMed:9840937). Negatively regulates cell division by controlling expression of a set of genes required for this process (PubMed:11025664, PubMed:12524540, PubMed:12810724, PubMed:15186775, PubMed:15340061, PubMed:17317671, PubMed:17349958, PubMed:19556538, PubMed:20673990, PubMed:20959462, PubMed:22726440, PubMed:24051492, PubMed:24652652, PubMed:9840937). One of the activated genes is an inhibitor of cyclin-dependent kinases. Apoptosis induction seems to be mediated either by stimulation of BAX and FAS antigen expression, or by repression of Bcl-2 expression (PubMed:12524540, PubMed:17189187). Its pro-apoptotic activity is activated via its interaction with PPP1R13B/ASPP1 or TP53BP2/ASPP2 (PubMed:12524540). However, this activity is inhibited when the interaction with PPP1R13B/ASPP1 or TP53BP2/ASPP2 is displaced by PPP1R13L/iASPP (PubMed:12524540). In cooperation with mitochondrial PPIF is involved in activating oxidative stress-induced necrosis; the function is largely independent of transcription. Induces the transcription of long intergenic non-coding RNA p21 (lincRNA-p21) and lincRNA-Mkln1. LincRNA-p21 participates in TP53-dependent transcriptional repression leading to apoptosis and seems to have an effect on cell-cycle regulation. Implicated in Notch signaling cross-over. Prevents CDK7 kinase activity when associated to CAK complex in response to DNA damage, thus stopping cell cycle progression. Isoform 2 enhances the transactivation activity of isoform 1 from some but not all TP53-inducible promoters. Isoform 4 suppresses transactivation activity and impairs growth suppression mediated by isoform 1. Isoform 7 inhibits isoform 1-mediated apoptosis. Regulates the circadian clock by repressing CLOCK-BMAL1-mediated transcriptional activation of PER2 (PubMed:24051492)'} |
两种方法的结果并不相同,我更推荐使用 Uniprot API,实测速度比使用 Entrze API 约快 0.7 倍,而且不需要注册账号,此外,其注释与 GeneCards 中是一致的。