{ "metadata": { "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.0-final" }, "orig_nbformat": 2, "kernelspec": { "name": "python3", "display_name": "Python 3.9.0 64-bit", "metadata": { "interpreter": { "hash": "36cf16204b8548560b1c020c4e8fb5b57f0e4c58016f52f2d4be01e192833930" } } } }, "nbformat": 4, "nbformat_minor": 2, "cells": [ { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Requirement already satisfied: tqdm in /home/anson/.local/lib/python3.8/site-packages (4.59.0)\n" ] } ], "source": [ "!pip install tqdm" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [], "source": [ "import requests as r\n", "import pandas as pd\n", "from fuzzywuzzy import fuzz\n", "from functools import cache\n", "from tqdm import tqdm" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ " def stocks():\n", "\n", " raw_symbols = r.get(\n", " f\"https://cloud.iexapis.com/stable/ref-data/symbols?token=sk_b3323ec3072e44c5acc414868bdd40ce\"\n", " ).json()\n", " symbols = pd.DataFrame(data=raw_symbols)\n", "\n", " symbols[\"description\"] = \"$\" + symbols[\"symbol\"] + \": \" + symbols[\"name\"]\n", " symbols[\"id\"] = symbols[\"symbol\"]\n", "\n", " symbols = symbols[[\"id\", \"symbol\", \"name\", \"description\"]]\n", "\n", " return symbols\n", "\n", "\n", "\n", " def coins():\n", "\n", " raw_symbols = r.get(\"https://api.coingecko.com/api/v3/coins/list\").json()\n", " symbols = pd.DataFrame(data=raw_symbols)\n", "\n", " symbols[\"description\"] = \"$$\" + symbols[\"symbol\"] + \": \" + symbols[\"name\"]\n", " symbols = symbols[[\"id\", \"symbol\", \"name\", \"description\"]]\n", "\n", " return symbols" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " id symbol \\\n", "0 A A \n", "1 AA AA \n", "2 AAA AAA \n", "3 AAAU AAAU \n", "4 AAC AAC \n", "... ... ... \n", "6565 zyro zyro \n", "6566 zytara-dollar zusd \n", "6567 zyx zyx \n", "6568 zzz-finance zzz \n", "6569 zzz-finance-v2 zzzv2 \n", "\n", " name \\\n", "0 Agilent Technologies Inc. \n", "1 Alcoa Corp \n", "2 Listed Funds Trust - AAF First Priority CLO Bo... \n", "3 Goldman Sachs Physical Gold ETF Shares - Goldm... \n", "4 Ares Acquisition Corporation - Class A \n", "... ... \n", "6565 Zyro \n", "6566 Zytara Dollar \n", "6567 ZYX \n", "6568 zzz.finance \n", "6569 zzz.finance v2 \n", "\n", " description \n", "0 $A: Agilent Technologies Inc. \n", "1 $AA: Alcoa Corp \n", "2 $AAA: Listed Funds Trust - AAF First Priority ... \n", "3 $AAAU: Goldman Sachs Physical Gold ETF Shares ... \n", "4 $AAC: Ares Acquisition Corporation - Class A \n", "... ... \n", "6565 $$zyro: Zyro \n", "6566 $$zusd: Zytara Dollar \n", "6567 $$zyx: ZYX \n", "6568 $$zzz: zzz.finance \n", "6569 $$zzzv2: zzz.finance v2 \n", "\n", "[16946 rows x 4 columns]" ], "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
idsymbolnamedescription
0AAAgilent Technologies Inc.$A: Agilent Technologies Inc.
1AAAAAlcoa Corp$AA: Alcoa Corp
2AAAAAAListed Funds Trust - AAF First Priority CLO Bo...$AAA: Listed Funds Trust - AAF First Priority ...
3AAAUAAAUGoldman Sachs Physical Gold ETF Shares - Goldm...$AAAU: Goldman Sachs Physical Gold ETF Shares ...
4AACAACAres Acquisition Corporation - Class A$AAC: Ares Acquisition Corporation - Class A
...............
6565zyrozyroZyro$$zyro: Zyro
6566zytara-dollarzusdZytara Dollar$$zusd: Zytara Dollar
6567zyxzyxZYX$$zyx: ZYX
6568zzz-financezzzzzz.finance$$zzz: zzz.finance
6569zzz-finance-v2zzzv2zzz.finance v2$$zzzv2: zzz.finance v2
\n

16946 rows × 4 columns

\n
" }, "metadata": {}, "execution_count": 51 } ], "source": [ "df = pd.concat([stocks(), coins()])\n", "df" ] }, { "cell_type": "code", "execution_count": 79, "metadata": {}, "outputs": [], "source": [ " def search_symbols(search: str):\n", " \"\"\"Performs a fuzzy search to find stock symbols closest to a search term.\n", "\n", " Parameters\n", " ----------\n", " search : str\n", " String used to search, could be a company name or something close to the companies stock ticker.\n", "\n", " Returns\n", " -------\n", " List[tuple[str, str]]\n", " A list tuples of every stock sorted in order of how well they match. Each tuple contains: (Symbol, Issue Name).\n", " \"\"\"\n", "\n", " try:\n", " if search_index[search]: return search_index[search]\n", " except KeyError:\n", " pass\n", "\n", "\n", "\n", " search = search.lower()\n", "\n", " df[\"Match\"] = df.apply(\n", " lambda x: fuzz.ratio(search, f\"{x['symbol']}\".lower()),\n", " axis=1,\n", " )\n", "\n", " df.sort_values(by=\"Match\", ascending=False, inplace=True)\n", " if df[\"Match\"].head().sum() < 300:\n", " df[\"Match\"] = df.apply(\n", " lambda x: fuzz.partial_ratio(search, x[\"name\"].lower()),\n", " axis=1,\n", " )\n", "\n", " df.sort_values(by=\"Match\", ascending=False, inplace=True)\n", "\n", " symbols = df.head(20)\n", " symbol_list = list(zip(list(symbols[\"symbol\"]), list(symbols[\"description\"])))\n", " \n", " return symbol_list" ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [], "source": [ "search_list = df['id'].to_list() + df['description'].to_list()\n", "search_index = {}" ] }, { "cell_type": "code", "execution_count": 92, "metadata": { "tags": [] }, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ " 5%|▍ | 1545/33892 [06:51<2:23:40, 3.75it/s]\n" ] }, { "output_type": "error", "ename": "KeyboardInterrupt", "evalue": "", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtqdm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msearch_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0msearch_index\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msearch_symbols\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m\u001b[0m in \u001b[0;36msearch_symbols\u001b[0;34m(search)\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0msearch\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msearch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlower\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 24\u001b[0;31m df[\"Match\"] = df.apply(\n\u001b[0m\u001b[1;32m 25\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mfuzz\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mratio\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msearch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34mf\"{x['symbol']}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlower\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.local/lib/python3.9/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mapply\u001b[0;34m(self, func, axis, raw, result_type, args, **kwds)\u001b[0m\n\u001b[1;32m 7763\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7764\u001b[0m )\n\u001b[0;32m-> 7765\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7766\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7767\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapplymap\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mna_action\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.local/lib/python3.9/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mget_result\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_raw\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 184\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 185\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_standard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 186\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 187\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapply_empty_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.local/lib/python3.9/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 274\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 275\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mapply_standard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 276\u001b[0;31m \u001b[0mresults\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mres_index\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_series_generator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 277\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 278\u001b[0m \u001b[0;31m# wrap results\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.local/lib/python3.9/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mapply_series_generator\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 286\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 287\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0moption_context\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"mode.chained_assignment\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 288\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mseries_gen\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 289\u001b[0m \u001b[0;31m# ignore SettingWithCopy here in case the user mutates\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 290\u001b[0m \u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.local/lib/python3.9/site-packages/pandas/core/apply.py\u001b[0m in \u001b[0;36mseries_generator\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 408\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0marr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 409\u001b[0m \u001b[0;31m# GH#35462 re-pin mgr in case setitem changed it\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 410\u001b[0;31m \u001b[0mser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_mgr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmgr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 411\u001b[0m \u001b[0mblk\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0marr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[0mser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/.local/lib/python3.9/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m__setattr__\u001b[0;34m(self, name, value)\u001b[0m\n\u001b[1;32m 5473\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5474\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5475\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__setattr__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5476\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mAttributeError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5477\u001b[0m \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "\n", "for i in tqdm(search_list):\n", " search_index[i] = search_symbols(i)" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [], "source": [ "import pickle\n", "\n", "\n", "\n", "with open('search_index.pickle', 'wb') as handle:\n", " pickle.dump(search_index, handle, protocol=pickle.HIGHEST_PROTOCOL)\n", "\n", "# with open('filename.pickle', 'rb') as handle:\n", "# b = pickle.load(handle)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ] }