Edward Lance Lorilla

itunes_df['Seconds'] = itunes_df['Milliseconds'] / 1000

itunes_df['len_byte_ratio'] = itunes_df['Milliseconds'] / itunes_df['Bytes']

genre_dict = {'metal': 'Metal', 'met': 'Metal'}

itunes_df['Genre'].replace(genre_dict)

itunes_df['Genre'].apply(lambda x: x.lower())

# the above is the same as this

def lowercase(x):

return x.lower()

itunes_df['Genre'].apply(lowercase)

# but using built-in functions is almost always faster

itunes_df['Genre'].str.lower()

# this is a common sentiment analysis library; polarity is positive/negative sentiment,

# subjectivety is subjective/objective rating.

from textblob import TextBlob

test = TextBlob("Textblob is amazingly simple to use. What great fun!")

test.sentiment

test.sentiment.polarity

# it would be better than apply to use a list comprehension to get sentiment of track names, like this

itunes_df['Track_sentiment'] = [TextBlob(x).sentiment.polarity for x in itunes_df['Track']]

# but, if we wanted to mix polarity and subjectivity into one column, it would be best to use apply:

def pol_sub_mix(x):

tb = TextBlob(x)

return tb.polarity * tb.subjectivity

itunes_df['Track_pol_sub_mix'] = itunes_df['Track'].apply(pol_sub_mix)

# delete these columns

itunes_df.drop(['Track_pol_sub_mix', 'Track_sentiment'], inplace=True, axis=1)

# currently doesn't work with python 3.9

import swifter

itunes_df['Genre'].swifter.apply(lambda x: x.lower())

itunes_df.to_csv('cleaned_itunes_data.csv', index=False)

itunes_df.groupby('Genre').mean()['Seconds'].sort_values().head()

btc_df = pd.read_csv('bitcoin_price.csv')

btc_df.head()

btc_df['symbol'].unique()

btc_df.drop('symbol', axis=1, inplace=True)

btc_df['time'] = pd.to_datetime(btc_df['time'], unit='ms')

btc_df['time'].dtype

btc_df.info()

btc_df.set_index('time', inplace=True)

btc_df.head()

btc_df[['close']].plot(logy=True)

f = plt.figure(figsize=(5.5, 5.5))

btc_df.iloc[-3000:][['close']].plot(logy=True, figsize=(5.5, 5.5))

f.patch.set_facecolor('w') # sets background color behind axis labels

plt.tight_layout() # auto-adjust margins

plt.savefig('B17030_04_11.png', dpi=300)

btc_df2 = pd.read_csv('bitcoin_price.csv', index_col='time', parse_dates=['time'], infer_datetime_format=True)

date_parser = lambda x: pd.to_datetime(x, unit='ms')

btc_df2 = pd.read_csv('bitcoin_price.csv', index_col='time', parse_dates=['time'], date_parser=date_parser)

btc_df2.head()

btc_df.loc['2019']

Edward Lance Lorilla

【PYTHON】Bitcoin data analysis

No comments:

Power Is Shifting Rapidly to Indie Creators

Contact Form

Report Abuse