Simple example of how to use Python to get data from the Ethereum blockchain.
- tokens.ipynb: Jupyter notebook with the code used (Python)
- tokens.csv (input file): List of tokens to analyze
- ./datasets/balances.csv (output file): Matrix with the balance of tokens of different addresses
- ./datasets/{TOKEN_NAME}.csv (output file): Raw data of user transactions with the token
from urllib2 import Request, urlopen, URLError
import pandas as pd
import numpy as np
import json
# Write your Etherscan API key here
APIKEY = ""
# Make sure you have this file in the same directory
tokens = pd.read_csv('tokens.csv', index_col=False, header=0)
tokenTxs = {}
def getTxs(token):
try:
# Will look only for normal txs
request = Request("http://api.etherscan.io/api?module=account&action=txlist&address={}&apikey={}".format(token["Address"], APIKEY))
response = urlopen(request)
txs = response.read()
tokenTxs[token["Project"]] = pd.read_json(json.dumps(json.loads(txs)["result"]), orient='records')
except URLError, e:
print 'API error. Got an error code:', e
tokens.apply(getTxs, axis=1)
# Sample data
tokenTxs["Acade City"].head()
|
blockHash |
blockNumber |
confirmations |
contractAddress |
cumulativeGasUsed |
from |
gas |
gasPrice |
gasUsed |
hash |
input |
isError |
nonce |
timeStamp |
to |
transactionIndex |
value |
0 |
0x5fd4ccfbc4fd08790bec564d3941bdd39a59fc242fc7... |
2541610 |
798691 |
|
419262 |
0x5f23acdd1e87112b5fe143509d74ded22b6e59b3 |
400000 |
21800903077 |
87290 |
0x92089c7ccbcb0e190e4d87e3158c0572e066f22ab53a... |
0x7d124a0200000000000000000000000008e50ae3e83f... |
0 |
15 |
2016-10-31 14:38:13 |
0xac709fcb44a43c35f0da4e3163b117a17f3770f5 |
1 |
0.000000e+00 |
1 |
0xacf96f6ef0fcd8878ed90705f3b35a01d36f41a73ffb... |
2541693 |
798608 |
|
205240 |
0xb6f2af0b3551161fe95f18219a8d402fc4e0233b |
100000 |
21000000000 |
100000 |
0xf64a321a017ddf84a0daa9aecdf3353c0a5b1369822f... |
0x |
1 |
27 |
2016-10-31 14:58:24 |
0xac709fcb44a43c35f0da4e3163b117a17f3770f5 |
5 |
1.000000e+17 |
2 |
0x12aa0744a148380816466e7a8b7f7302bcd3971a998c... |
2541866 |
798435 |
|
105000 |
0x5d61433e4dbd2e6a44c62846a7ef3a1d4cd256b3 |
21000 |
21000000000 |
21000 |
0xdcdbed18849b1b7603c0fa5075a037cf48971ac9ce1c... |
0x |
1 |
0 |
2016-10-31 15:43:16 |
0xac709fcb44a43c35f0da4e3163b117a17f3770f5 |
4 |
1.000000e+12 |
3 |
0x9ea42e81a52662618b847314d253bbb3fc7bef219f0d... |
2542668 |
797633 |
|
121000 |
0xcecafbdbbb5d5baf57844a6611e36fc781aad017 |
100000 |
21000000000 |
100000 |
0x934743bcc99b9e211ff607407740e5eebd3255283a33... |
0x |
1 |
0 |
2016-10-31 18:52:16 |
0xac709fcb44a43c35f0da4e3163b117a17f3770f5 |
1 |
6.000000e+18 |
4 |
0x6d29e46ae11e4c5cef7f0ea2580993fd6451ce5a5ec7... |
2543686 |
796615 |
|
358785 |
0x6cfabd40891abe610efd0cc0cfb8a2f2209ea68d |
100000 |
26000000000 |
100000 |
0xb369fac72065e37a74712f8c4375f5d1deea9b735c01... |
0x |
1 |
0 |
2016-10-31 22:58:32 |
0xac709fcb44a43c35f0da4e3163b117a17f3770f5 |
10 |
4.000000e+18 |
# Remove duplicates
uniqueAddresses = np.array([])
for txs in tokenTxs.itervalues():
uniqueAddresses = np.concatenate([uniqueAddresses, np.unique(txs["from"])])
print "Number of user accounts (with duplicates): {}".format(len(uniqueAddresses))
uniqueAddresses = np.unique(uniqueAddresses)
print "Number of user accounts (after removing duplicates): {}".format(len(uniqueAddresses))
Number of user accounts (with duplicates): 17362
Number of user accounts (after removing duplicates): 15286
# Uncomment to install web3
#!pip install web3
from web3 import Web3, KeepAliveRPCProvider, IPCProvider
# Note that you should create only one RPCProvider per
# process, as it recycles underlying TCP/IP network connections between
# your process and Ethereum node
#web3 = Web3(KeepAliveRPCProvider(host='localhost', port='8545'))
# or for an IPC based connection
web3 = Web3(IPCProvider())
# ERC20 standar
abi = json.loads('[{"constant":false,"inputs":[{"name":"_spender","type":"address"},{"name":"_value","type":"uint256"}],"name":"approve","outputs":[{"name":"success","type":"bool"}],"payable":false,"type":"function"},{"constant":true,"inputs":[],"name":"totalSupply","outputs":[{"name":"","type":"uint256"}],"payable":false,"type":"function"},{"constant":false,"inputs":[{"name":"_from","type":"address"},{"name":"_to","type":"address"},{"name":"_value","type":"uint256"}],"name":"transferFrom","outputs":[{"name":"success","type":"bool"}],"payable":false,"type":"function"},{"constant":true,"inputs":[{"name":"_owner","type":"address"}],"name":"balanceOf","outputs":[{"name":"balance","type":"uint256"}],"payable":false,"type":"function"},{"constant":false,"inputs":[{"name":"_to","type":"address"},{"name":"_value","type":"uint256"}],"name":"transfer","outputs":[{"name":"success","type":"bool"}],"payable":false,"type":"function"},{"constant":true,"inputs":[{"name":"_owner","type":"address"},{"name":"_spender","type":"address"}],"name":"allowance","outputs":[{"name":"remaining","type":"uint256"}],"payable":false,"type":"function"},{"anonymous":false,"inputs":[{"indexed":true,"name":"_from","type":"address"},{"indexed":true,"name":"_to","type":"address"},{"indexed":false,"name":"_value","type":"uint256"}],"name":"Transfer","type":"event"},{"anonymous":false,"inputs":[{"indexed":true,"name":"_owner","type":"address"},{"indexed":true,"name":"_spender","type":"address"},{"indexed":false,"name":"_value","type":"uint256"}],"name":"Approval","type":"event"}]')
balances = pd.DataFrame("", index=uniqueAddresses, columns=tokens["Project"].values)
def setBalances(token):
tokenContract = web3.eth.contract(
abi = abi,
address = token["Address"]
)
for address in uniqueAddresses:
try:
balances.set_value(address, token["Project"], tokenContract.call().balanceOf(address))
except URLError, e:
print 'Web3 error. Got an error code:', e
tokens.apply(setBalances, axis=1)
# Sample data
balances.head()
|
Augur |
ICONOMI |
Golem |
Digix |
Pluton (Plutus) |
SingularDTV |
First Blood |
VSice (VDlice) |
Hacker Gold (Ether.camp) |
Maker DAO |
Chrono Bank |
Unicorns (Ethereum Fundation) |
Xaurum |
Acade City |
Swarm City |
Bitpark Coin |
Round |
0x00004aba4ac63de11447e4e17aca83f0abb1fc33 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0x000118f3bd5a727f663c85c671370760c7730927 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0x0001aebe0b48bbf1cee8df2c0dfd7c2031543859 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0x0001fe7648a2c144becdf9f17f0055315a519f86 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
134349877 |
0 |
0 |
0 |
0 |
0 |
0 |
0x000313efbb302549f83e35e50bf0a4e3f0a639af |
0 |
0 |
2262127659574400000 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
# You need to have a ./dataset directory
# Raw data
for project, txs in tokenTxs.items():
url = "./datasets/{}.csv".format(project)
txs.to_csv(url, sep=',')
# Balances
balances.to_csv("./datasets/balances.csv", sep=',')