In [2]:
# Pandas for managing datasets
import pandas as pd

# Display floats with 2 decimal places
pd.options.display.float_format = '{:,.2f}'.format

# Expand display limits
pd.options.display.max_rows = 200
pd.options.display.max_columns = 100

# Read BNC2 sample dataset
df = pd.read_csv('BNC2_sample.csv',
                 names=['Code', 'Date', 'Open', 'High', 'Low', 
                        'Close', 'Volume', 'VWAP', 'TWAP'])
 
# Display first 5 observations
df.head()
Out[2]:
Code Date Open High Low Close Volume VWAP TWAP
0 GWA_BTC 2014-04-01 467.28 488.62 467.28 479.56 74,776.48 482.76 482.82
1 GWA_BTC 2014-04-02 479.20 494.30 431.32 437.08 114,052.96 460.19 465.93
2 GWA_BTC 2014-04-03 437.33 449.74 414.41 445.60 91,415.08 432.29 433.28
3 GWA_BTC 2014-04-04 445.18 456.10 429.16 449.81 51,147.27 443.46 443.93
4 GWA_BTC 2014-04-05 450.08 464.09 445.16 461.70 28,449.19 452.53 452.95
In [3]:
# Unique codes in the dataset
print( df.Code.unique() )
['GWA_BTC' 'GWA_ETH' 'GWA_LTC' 'GWA_XLM' 'GWA_XRP' 'MWA_BTC_CNY'
 'MWA_BTC_EUR' 'MWA_BTC_GBP' 'MWA_BTC_JPY' 'MWA_BTC_USD' 'MWA_ETH_CNY'
 'MWA_ETH_EUR' 'MWA_ETH_GBP' 'MWA_ETH_JPY' 'MWA_ETH_USD' 'MWA_LTC_CNY'
 'MWA_LTC_EUR' 'MWA_LTC_GBP' 'MWA_LTC_JPY' 'MWA_LTC_USD' 'MWA_XLM_CNY'
 'MWA_XLM_EUR' 'MWA_XLM_USD' 'MWA_XRP_CNY' 'MWA_XRP_EUR' 'MWA_XRP_GBP'
 'MWA_XRP_JPY' 'MWA_XRP_USD']
In [4]:
# Example of GWA and MWA relationship
df[df.Code.isin(['GWA_BTC', 'MWA_BTC_JPY', 'MWA_BTC_EUR']) & (df.Date == '2018-01-01')]
Out[4]:
Code Date Open High Low Close Volume VWAP TWAP
1371 GWA_BTC 2018-01-01 14,505.89 14,505.89 13,617.46 14,092.74 225,906.21 14,103.18 14,093.73
9074 MWA_BTC_EUR 2018-01-01 11,859.35 11,859.35 11,111.07 11,403.92 14,933.73 11,488.45 11,478.08
11838 MWA_BTC_JPY 2018-01-01 1,674,341.45 1,678,567.55 1,572,173.90 1,632,657.51 68,611.95 1,632,994.40 1,631,407.66
In [5]:
# Number of observations in dataset
print( 'Before:', len(df) )
 
# Get all the GWA codes
gwa_codes = [code for code in df.Code.unique() if 'GWA_' in code]
 
# Only keep GWA observations
df = df[df.Code.isin(gwa_codes)]
 
# Number of observations left
print( 'After:', len(df) )
Before: 31761
After: 6309
In [6]:
# Pivot dataset
pivoted_df = df.pivot(index='Date', columns='Code', values='VWAP')
 
# Display examples from pivoted dataset
pivoted_df.tail()
Out[6]:
Code GWA_BTC GWA_ETH GWA_LTC GWA_XLM GWA_XRP
Date
2018-01-19 11,826.36 1,068.45 195.00 0.51 1.82
2018-01-20 13,062.68 1,158.71 207.58 0.52 1.75
2018-01-21 12,326.23 1,108.90 197.36 0.48 1.55
2018-01-22 11,397.52 1,038.21 184.92 0.47 1.43
2018-01-23 10,921.00 992.05 176.95 0.47 1.42
In [8]:
print( pivoted_df.tail(3) )
Code         GWA_BTC  GWA_ETH  GWA_LTC  GWA_XLM  GWA_XRP
Date                                                    
2018-01-21 12,326.23 1,108.90   197.36     0.48     1.55
2018-01-22 11,397.52 1,038.21   184.92     0.47     1.43
2018-01-23 10,921.00   992.05   176.95     0.47     1.42
In [9]:
print( pivoted_df.tail(3).shift(1) )
Code         GWA_BTC  GWA_ETH  GWA_LTC  GWA_XLM  GWA_XRP
Date                                                    
2018-01-21       nan      nan      nan      nan      nan
2018-01-22 12,326.23 1,108.90   197.36     0.48     1.55
2018-01-23 11,397.52 1,038.21   184.92     0.47     1.43
In [10]:
# Calculate returns over 7 days prior
delta_7 = pivoted_df / pivoted_df.shift(7) - 1.0
 
# Display examples
delta_7.tail()
Out[10]:
Code GWA_BTC GWA_ETH GWA_LTC GWA_XLM GWA_XRP
Date
2018-01-19 -0.18 -0.17 -0.18 -0.21 -0.22
2018-01-20 -0.13 -0.19 -0.18 -0.23 -0.29
2018-01-21 -0.15 -0.20 -0.22 -0.22 -0.30
2018-01-22 -0.21 -0.24 -0.24 -0.25 -0.32
2018-01-23 -0.11 -0.12 -0.13 -0.02 -0.04
In [12]:
# Calculate returns over each window and store them in dictionary
delta_dict = {}
for offset in [7, 14, 21, 28]:
    delta_dict['delta_{}'.format(offset)] = pivoted_df / pivoted_df.shift(offset) - 1.0

# Display result "delta_dict"
delta_dict
Out[12]:
{'delta_7': Code        GWA_BTC  GWA_ETH  GWA_LTC  GWA_XLM  GWA_XRP
 Date                                                   
 2014-04-01      nan      nan      nan      nan      nan
 2014-04-02      nan      nan      nan      nan      nan
 2014-04-03      nan      nan      nan      nan      nan
 2014-04-04      nan      nan      nan      nan      nan
 2014-04-05      nan      nan      nan      nan      nan
 ...             ...      ...      ...      ...      ...
 2018-01-19    -0.18    -0.17    -0.18    -0.21    -0.22
 2018-01-20    -0.13    -0.19    -0.18    -0.23    -0.29
 2018-01-21    -0.15    -0.20    -0.22    -0.22    -0.30
 2018-01-22    -0.21    -0.24    -0.24    -0.25    -0.32
 2018-01-23    -0.11    -0.12    -0.13    -0.02    -0.04
 
 [1394 rows x 5 columns],
 'delta_14': Code        GWA_BTC  GWA_ETH  GWA_LTC  GWA_XLM  GWA_XRP
 Date                                                   
 2014-04-01      nan      nan      nan      nan      nan
 2014-04-02      nan      nan      nan      nan      nan
 2014-04-03      nan      nan      nan      nan      nan
 2014-04-04      nan      nan      nan      nan      nan
 2014-04-05      nan      nan      nan      nan      nan
 ...             ...      ...      ...      ...      ...
 2018-01-19    -0.29     0.05    -0.23    -0.27    -0.41
 2018-01-20    -0.26     0.13    -0.29    -0.26    -0.42
 2018-01-21    -0.29    -0.01    -0.32    -0.31    -0.51
 2018-01-22    -0.29    -0.13    -0.30    -0.28    -0.52
 2018-01-23    -0.31    -0.22    -0.32    -0.24    -0.48
 
 [1394 rows x 5 columns],
 'delta_21': Code        GWA_BTC  GWA_ETH  GWA_LTC  GWA_XLM  GWA_XRP
 Date                                                   
 2014-04-01      nan      nan      nan      nan      nan
 2014-04-02      nan      nan      nan      nan      nan
 2014-04-03      nan      nan      nan      nan      nan
 2014-04-04      nan      nan      nan      nan      nan
 2014-04-05      nan      nan      nan      nan      nan
 ...             ...      ...      ...      ...      ...
 2018-01-19    -0.22     0.42    -0.24     0.88     0.02
 2018-01-20    -0.05     0.60    -0.09     0.57    -0.26
 2018-01-21    -0.11     0.51    -0.12     0.47    -0.28
 2018-01-22    -0.19     0.36    -0.19     0.05    -0.35
 2018-01-23    -0.25     0.13    -0.29    -0.10    -0.39
 
 [1394 rows x 5 columns],
 'delta_28': Code        GWA_BTC  GWA_ETH  GWA_LTC  GWA_XLM  GWA_XRP
 Date                                                   
 2014-04-01      nan      nan      nan      nan      nan
 2014-04-02      nan      nan      nan      nan      nan
 2014-04-03      nan      nan      nan      nan      nan
 2014-04-04      nan      nan      nan      nan      nan
 2014-04-05      nan      nan      nan      nan      nan
 ...             ...      ...      ...      ...      ...
 2018-01-19    -0.17     0.57    -0.24     1.30     0.65
 2018-01-20    -0.13     0.60    -0.27     1.23     0.53
 2018-01-21    -0.12     0.65    -0.27     1.22     0.45
 2018-01-22    -0.20     0.38    -0.33     1.09     0.30
 2018-01-23    -0.31     0.29    -0.37     1.16     0.29
 
 [1394 rows x 5 columns]}
In [14]:
# Melt delta_7 returns
melted_7 = delta_7.reset_index().melt(id_vars=['Date'], value_name='delta_7')
 
# Melted dataframe examples
melted_7.tail()
Out[14]:
Date Code delta_7
6965 2018-01-19 GWA_XRP -0.22
6966 2018-01-20 GWA_XRP -0.29
6967 2018-01-21 GWA_XRP -0.30
6968 2018-01-22 GWA_XRP -0.32
6969 2018-01-23 GWA_XRP -0.04
In [16]:
# Melt all the delta dataframes and store in list
melted_dfs = []
for key, delta_df in delta_dict.items():
    melted_dfs.append( delta_df.reset_index().melt(id_vars=['Date'], value_name=key) )

melted_dfs
Out[16]:
[            Date     Code  delta_7
 0     2014-04-01  GWA_BTC      nan
 1     2014-04-02  GWA_BTC      nan
 2     2014-04-03  GWA_BTC      nan
 3     2014-04-04  GWA_BTC      nan
 4     2014-04-05  GWA_BTC      nan
 ...          ...      ...      ...
 6965  2018-01-19  GWA_XRP    -0.22
 6966  2018-01-20  GWA_XRP    -0.29
 6967  2018-01-21  GWA_XRP    -0.30
 6968  2018-01-22  GWA_XRP    -0.32
 6969  2018-01-23  GWA_XRP    -0.04
 
 [6970 rows x 3 columns],             Date     Code  delta_14
 0     2014-04-01  GWA_BTC       nan
 1     2014-04-02  GWA_BTC       nan
 2     2014-04-03  GWA_BTC       nan
 3     2014-04-04  GWA_BTC       nan
 4     2014-04-05  GWA_BTC       nan
 ...          ...      ...       ...
 6965  2018-01-19  GWA_XRP     -0.41
 6966  2018-01-20  GWA_XRP     -0.42
 6967  2018-01-21  GWA_XRP     -0.51
 6968  2018-01-22  GWA_XRP     -0.52
 6969  2018-01-23  GWA_XRP     -0.48
 
 [6970 rows x 3 columns],             Date     Code  delta_21
 0     2014-04-01  GWA_BTC       nan
 1     2014-04-02  GWA_BTC       nan
 2     2014-04-03  GWA_BTC       nan
 3     2014-04-04  GWA_BTC       nan
 4     2014-04-05  GWA_BTC       nan
 ...          ...      ...       ...
 6965  2018-01-19  GWA_XRP      0.02
 6966  2018-01-20  GWA_XRP     -0.26
 6967  2018-01-21  GWA_XRP     -0.28
 6968  2018-01-22  GWA_XRP     -0.35
 6969  2018-01-23  GWA_XRP     -0.39
 
 [6970 rows x 3 columns],             Date     Code  delta_28
 0     2014-04-01  GWA_BTC       nan
 1     2014-04-02  GWA_BTC       nan
 2     2014-04-03  GWA_BTC       nan
 3     2014-04-04  GWA_BTC       nan
 4     2014-04-05  GWA_BTC       nan
 ...          ...      ...       ...
 6965  2018-01-19  GWA_XRP      0.65
 6966  2018-01-20  GWA_XRP      0.53
 6967  2018-01-21  GWA_XRP      0.45
 6968  2018-01-22  GWA_XRP      0.30
 6969  2018-01-23  GWA_XRP      0.29
 
 [6970 rows x 3 columns]]
In [19]:
# Calculate 7-day returns after the date
return_df = pivoted_df.shift(-7) / pivoted_df - 1.0
 
# Melt the return dataset and append to list
melted_dfs.append( return_df.reset_index().melt(id_vars=['Date'], value_name='return_7') )
In [20]:
# Merge two dataframes
pd.merge(melted_dfs[0], melted_dfs[1], on=['Date', 'Code']).tail()
Out[20]:
Date Code delta_7 delta_14
6965 2018-01-19 GWA_XRP -0.22 -0.41
6966 2018-01-20 GWA_XRP -0.29 -0.42
6967 2018-01-21 GWA_XRP -0.30 -0.51
6968 2018-01-22 GWA_XRP -0.32 -0.52
6969 2018-01-23 GWA_XRP -0.04 -0.48
In [21]:
from functools import reduce
In [24]:
# Grab features from original dataset
base_df = df[['Date', 'Code', 'Volume', 'VWAP']]
 
# Create a list with all the feature dataframes
feature_dfs = [base_df] + melted_dfs
In [25]:
# Reduce-merge features into analytical base table
abt = reduce(lambda left,right: pd.merge(left,right,on=['Date', 'Code']), feature_dfs)
 
# Display examples from the ABT
abt.tail(10)
Out[25]:
Date Code Volume VWAP delta_7 delta_14 delta_21 delta_28 return_7_x return_7_y
6299 2018-01-14 GWA_XRP 912,107,674.18 2.20 -0.31 0.02 1.06 1.97 -0.30 -0.30
6300 2018-01-15 GWA_XRP 823,491,754.55 2.11 -0.29 -0.04 0.92 1.84 -0.32 -0.32
6301 2018-01-16 GWA_XRP 3,872,977,355.95 1.48 -0.46 -0.36 0.35 0.85 -0.04 -0.04
6302 2018-01-17 GWA_XRP 5,111,390,628.85 1.20 -0.47 -0.57 -0.06 0.58 nan nan
6303 2018-01-18 GWA_XRP 5,156,172,462.44 1.68 -0.22 -0.51 0.23 0.59 nan nan
6304 2018-01-19 GWA_XRP 2,126,239,927.56 1.82 -0.22 -0.41 0.02 0.65 nan nan
6305 2018-01-20 GWA_XRP 1,346,913,296.52 1.75 -0.29 -0.42 -0.26 0.53 nan nan
6306 2018-01-21 GWA_XRP 1,886,060,450.81 1.55 -0.30 -0.51 -0.28 0.45 nan nan
6307 2018-01-22 GWA_XRP 1,784,992,299.63 1.43 -0.32 -0.52 -0.35 0.30 nan nan
6308 2018-01-23 GWA_XRP 2,118,335,564.32 1.42 -0.04 -0.48 -0.39 0.29 nan nan
In [26]:
# Data from Sept 1st, 2017
abt[abt.Date == '2017-09-01']
Out[26]:
Date Code Volume VWAP delta_7 delta_14 delta_21 delta_28 return_7_x return_7_y
1249 2017-09-01 GWA_BTC 275,034.79 4,798.06 0.10 0.12 0.35 0.69 -0.09 -0.09
2149 2017-09-01 GWA_ETH 2,076,778.42 387.55 0.17 0.28 0.29 0.72 -0.21 -0.21
3543 2017-09-01 GWA_LTC 18,553,463.67 78.76 0.55 0.71 0.68 0.82 -0.10 -0.10
4770 2017-09-01 GWA_XLM 372,143,342.95 0.02 0.19 0.34 0.05 0.10 -0.19 -0.19
6164 2017-09-01 GWA_XRP 1,138,500,431.07 0.25 0.15 0.57 0.39 0.44 -0.14 -0.14
In [30]:
max_momentum_id = abt[abt.Date == '2017-09-01'].delta_28.idxmax()
abt.loc[max_momentum_id, ['Code','return_7']]
Out[30]:
Code        GWA_LTC
return_7        NaN
Name: 3543, dtype: object
In [31]:
# Create 'month' feature
abt['month'] = abt.Date.apply(lambda x: x[:7])
 
# Group by 'Code' and 'month' and keep first date
gb_df = abt.groupby(['Code', 'month']).first().reset_index()
 
# Display examples
gb_df.tail()
Out[31]:
Code month Date Volume VWAP delta_7 delta_14 delta_21 delta_28 return_7_x return_7_y
204 GWA_XRP 2017-09 2017-09-01 1,138,500,431.07 0.25 0.15 0.57 0.39 0.44 -0.14 -0.14
205 GWA_XRP 2017-10 2017-10-01 242,650,661.68 0.20 0.12 0.12 -0.05 -0.13 0.31 0.31
206 GWA_XRP 2017-11 2017-11-01 344,634,241.50 0.20 -0.03 -0.11 -0.25 -0.05 0.07 0.07
207 GWA_XRP 2017-12 2017-12-01 705,807,839.14 0.25 0.03 0.09 0.17 0.19 -0.00 -0.00
208 GWA_XRP 2018-01 2018-01-01 813,772,647.20 2.20 1.01 1.97 7.96 7.77 0.35 0.35