{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Tutorial 4: Generate images from the paper" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# importing all usefull classes from PyCoM\n", "from pycom import PyCom, ProteinParams,CoMAnalysis\n", "import pandas as pd\n", "import numpy as np\n", "# matplotlib\n", "import matplotlib\n", "import matplotlib.pyplot as plt\n", "matplotlib.rcParams['pdf.fonttype'] = 42\n", "matplotlib.rcParams['font.family'] = \"sans-serif\"\n", "matplotlib.rcParams['font.sans-serif'] = \"Arial\"" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "#set the path to the database\n", "database_folder_path=\"/Volumes/mason/Work/Sarath/Research/pycom/\"\n", "file_matrix_db = database_folder_path+\"pycom.mat\"\n", "file_protein_db= database_folder_path+\"pycom.db\"\n", "my_color=\"#6495ED\"" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "obj_pycom = PyCom(db_path=file_protein_db, mat_path=file_matrix_db)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "obj_pycom = PyCom(remote=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Construct your query (its empty as I want all information)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# Here we are asking for all the proteins that match the enzyme class 3 and have been associated with the disease cancer.\n", "query_parameters={}\n", "# executing the query returns a pandas dataframe with information about all the proteins which match the query" ] }, { "cell_type": "markdown", "metadata": { "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ "Finding out dimensions of the dataframe:" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/sdantu/Work/pyc_wspace/pycom/pycom/pycom/interface/_find_helper.py:19: UserWarning: No constraints were passed to find(). This will return all proteins in the database.\n", " warn('No constraints were passed to find(). This will return all proteins in the database.')\n" ] } ], "source": [ "entries_data_frame=obj_pycom.find(query_parameters)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | neff | \n", "sequence_length | \n", "helix_frac | \n", "turn_frac | \n", "strand_frac | \n", "has_ptm | \n", "has_pdb | \n", "has_substrate | \n", "
---|---|---|---|---|---|---|---|---|
count | \n", "457622.000000 | \n", "457622.000000 | \n", "457622.000000 | \n", "457622.000000 | \n", "457622.000000 | \n", "457622.000000 | \n", "457622.000000 | \n", "457622.000000 | \n", "
mean | \n", "8.397407 | \n", "251.278734 | \n", "0.013926 | \n", "0.001262 | \n", "0.009162 | \n", "0.064923 | \n", "0.050238 | \n", "0.427045 | \n", "
std | \n", "2.498266 | \n", "124.627642 | \n", "0.076069 | \n", "0.008348 | \n", "0.052192 | \n", "0.246389 | \n", "0.218436 | \n", "0.494649 | \n", "
min | \n", "1.000000 | \n", "5.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "
25% | \n", "6.928000 | \n", "147.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "
50% | \n", "8.621000 | \n", "243.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "
75% | \n", "10.176000 | \n", "351.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "1.000000 | \n", "
max | \n", "17.205000 | \n", "500.000000 | \n", "0.956522 | \n", "0.542857 | \n", "0.816901 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "