itunes_df['Seconds'] = itunes_df['Milliseconds'] / 1000
itunes_df['len_byte_ratio'] = itunes_df['Milliseconds'] / itunes_df['Bytes']
genre_dict = {'metal': 'Metal', 'met': 'Metal'}
itunes_df['Genre'].replace(genre_dict)
itunes_df['Genre'].apply(lambda x: x.lower())
# the above is the same as this
def lowercase(x):
return x.lower()
itunes_df['Genre'].apply(lowercase)
# but using built-in functions is almost always faster
itunes_df['Genre'].str.lower()
# this is a common sentiment analysis library; polarity is positive/negative sentiment,
# subjectivety is subjective/objective rating.
from textblob import TextBlob
test = TextBlob("Textblob is amazingly simple to use. What great fun!")
test.sentiment
test.sentiment.polarity
# it would be better than apply to use a list comprehension to get sentiment of track names, like this
itunes_df['Track_sentiment'] = [TextBlob(x).sentiment.polarity for x in itunes_df['Track']]
# but, if we wanted to mix polarity and subjectivity into one column, it would be best to use apply:
def pol_sub_mix(x):
tb = TextBlob(x)
return tb.polarity * tb.subjectivity
itunes_df['Track_pol_sub_mix'] = itunes_df['Track'].apply(pol_sub_mix)
# delete these columns
itunes_df.drop(['Track_pol_sub_mix', 'Track_sentiment'], inplace=True, axis=1)
# currently doesn't work with python 3.9
import swifter
itunes_df['Genre'].swifter.apply(lambda x: x.lower())
itunes_df.to_csv('cleaned_itunes_data.csv', index=False)
itunes_df.groupby('Genre').mean()['Seconds'].sort_values().head()
btc_df = pd.read_csv('bitcoin_price.csv')
btc_df.head()
btc_df['symbol'].unique()
btc_df.drop('symbol', axis=1, inplace=True)
btc_df['time'] = pd.to_datetime(btc_df['time'], unit='ms')
btc_df['time'].dtype
btc_df.info()
btc_df.set_index('time', inplace=True)
btc_df.head()
btc_df[['close']].plot(logy=True)
f = plt.figure(figsize=(5.5, 5.5))
btc_df.iloc[-3000:][['close']].plot(logy=True, figsize=(5.5, 5.5))
f.patch.set_facecolor('w') # sets background color behind axis labels
plt.tight_layout() # auto-adjust margins
plt.savefig('B17030_04_11.png', dpi=300)
btc_df2 = pd.read_csv('bitcoin_price.csv', index_col='time', parse_dates=['time'], infer_datetime_format=True)
date_parser = lambda x: pd.to_datetime(x, unit='ms')
btc_df2 = pd.read_csv('bitcoin_price.csv', index_col='time', parse_dates=['time'], date_parser=date_parser)
btc_df2.head()
btc_df.loc['2019']
No comments:
Post a Comment