import pandas as pd
Lecture 4.1
Continuing Discussion on Pandas - Part 3
= "data/spotify-top-50-playlist-songs-anxods/data/spotify-streaming-top-50-world.csv"
WORLD_DATA_PATH = "data/spotify-top-50-playlist-songs-anxods/data/spotify-streaming-top-50-usa.csv" USA_DATA_PATH
= pd.read_csv(WORLD_DATA_PATH)
world_df = pd.read_csv(USA_DATA_PATH) usa_df
world_df.sample()
date | position | song | artist | popularity | duration_ms | album_type | total_tracks | release_date | is_explicit | album_cover_url | |
---|---|---|---|---|---|---|---|---|---|---|---|
263 | 2023-05-23 | 14 | El Azul | Junior H & Peso Pluma | 95 | 187225 | single | 1 | 2023-02-10 | False | https://i.scdn.co/image/ab67616d0000b27333ed35... |
1) usa_df.sample(
date | position | song | artist | popularity | duration_ms | album_type | total_tracks | release_date | is_explicit | album_cover_url | |
---|---|---|---|---|---|---|---|---|---|---|---|
254 | 2023-05-23 | 5 | Kill Bill | SZA | 94 | 153946 | album | 23 | 2022-12-08 | False | https://i.scdn.co/image/ab67616d0000b2730c471c... |
1. Revisiting groupby
source = “https://www.kaggle.com/code/alenavorushilova/grouping-sorting-and-filtering-data-tutorial”
"position") world_df.groupby(
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x117d3ae90>
Q:For each artist in the dataset, what was the longest song that made it to top 50?
1) world_df.sample(
date | position | song | artist | popularity | duration_ms | album_type | total_tracks | release_date | is_explicit | album_cover_url | |
---|---|---|---|---|---|---|---|---|---|---|---|
105 | 2023-05-20 | 6 | Daylight | David Kushner | 97 | 212953 | single | 1 | 2023-04-14 | False | https://i.scdn.co/image/ab67616d0000b27395ca6a... |
"artist") world_df.groupby(
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x117d3b880>
"artist")["duration_ms"].max().to_frame() world_df.groupby(
duration_ms | |
---|---|
artist | |
Alessandra | 147979 |
Arctic Monkeys | 183956 |
BTS | 229953 |
Bad Bunny | 231704 |
Beyoncé & Kendrick Lamar | 260962 |
... | ... |
Tyler, The Creator | 180386 |
Yahritza Y Su Esencia & Grupo Frontera | 160517 |
Yandel | 216148 |
Yng Lvcas & Peso Pluma | 234352 |
d4vd | 242484 |
75 rows × 1 columns
Q: For each position (1 to 50), which artist was in that rank the maximum number of days ( in total , needn’t be consecutive)
'position') world_df.groupby(
<pandas.core.groupby.generic.DataFrameGroupBy object at 0x117f5c2b0>
'position')['artist'] world_df.groupby(
<pandas.core.groupby.generic.SeriesGroupBy object at 0x117f5c310>
'position')['artist'].value_counts().to_frame() world_df.groupby(
artist | ||
---|---|---|
position | artist | |
1 | Eslabon Armado | 31 |
Bad Bunny | 2 | |
Bizarrap & Peso Pluma | 2 | |
BTS | 1 | |
2 | Bizarrap & Peso Pluma | 16 |
... | ... | ... |
50 | Lil Durk | 1 |
Lil Mabu | 1 | |
Natanael Cano & Peso Pluma & Gabito Ballesteros | 1 | |
Sam Smith | 1 | |
Taylor Swift | 1 |
790 rows × 1 columns
2. Introducing MultiIndex in Pandas Dataframes
world_df.index
RangeIndex(start=0, stop=1800, step=1)
= world_df.groupby('position')['artist'].value_counts().to_frame()
world_df_with_rank_counts world_df_with_rank_counts.index
MultiIndex([( 1, 'Eslabon Armado'),
( 1, 'Bad Bunny'),
( 1, 'Bizarrap & Peso Pluma'),
( 1, 'BTS'),
( 2, 'Bizarrap & Peso Pluma'),
( 2, 'Bad Bunny'),
( 2, 'Eslabon Armado'),
( 2, 'Grupo Frontera & Bad Bunny'),
( 2, 'Yng Lvcas & Peso Pluma'),
( 3, 'Bad Bunny'),
...
(50, 'DENNIS & MC Kevin o Chris'),
(50, 'Dua Lipa'),
(50, 'Eden Muñoz & Junior H'),
(50, 'Eladio Carrion'),
(50, 'Kali Uchis'),
(50, 'Lil Durk'),
(50, 'Lil Mabu'),
(50, 'Natanael Cano & Peso Pluma & Gabito Ballesteros'),
(50, 'Sam Smith'),
(50, 'Taylor Swift')],
names=['position', 'artist'], length=790)
Approach 1
'position')#world_df_with_rank_counts.index.get_level_values(0) world_df_with_rank_counts.index.get_level_values(
Int64Index([ 1, 1, 1, 1, 2, 2, 2, 2, 2, 3,
...
50, 50, 50, 50, 50, 50, 50, 50, 50, 50],
dtype='int64', name='position', length=790)
'artist') #world_df_with_rank_counts.index.get_level_values(1) world_df_with_rank_counts.index.get_level_values(
Index(['Eslabon Armado', 'Bad Bunny', 'Bizarrap & Peso Pluma', 'BTS',
'Bizarrap & Peso Pluma', 'Bad Bunny', 'Eslabon Armado',
'Grupo Frontera & Bad Bunny', 'Yng Lvcas & Peso Pluma', 'Bad Bunny',
...
'DENNIS & MC Kevin o Chris', 'Dua Lipa', 'Eden Muñoz & Junior H',
'Eladio Carrion', 'Kali Uchis', 'Lil Durk', 'Lil Mabu',
'Natanael Cano & Peso Pluma & Gabito Ballesteros', 'Sam Smith',
'Taylor Swift'],
dtype='object', name='artist', length=790)
'rank'] = world_df_with_rank_counts.index.get_level_values('position')
world_df_with_rank_counts['artist(s)'] = world_df_with_rank_counts.index.get_level_values('artist')
world_df_with_rank_counts[2) world_df_with_rank_counts.sample(
artist | rank | artist(s) | ||
---|---|---|---|---|
position | artist | |||
39 | Taylor Swift | 1 | 39 | Taylor Swift |
46 | Dua Lipa | 1 | 46 | Dua Lipa |
= True, inplace = True) world_df_with_rank_counts.reset_index(drop
world_df_with_rank_counts
artist | rank | artist(s) | |
---|---|---|---|
0 | 31 | 1 | Eslabon Armado |
1 | 2 | 1 | Bad Bunny |
2 | 2 | 1 | Bizarrap & Peso Pluma |
3 | 1 | 1 | BTS |
4 | 16 | 2 | Bizarrap & Peso Pluma |
... | ... | ... | ... |
785 | 1 | 50 | Lil Durk |
786 | 1 | 50 | Lil Mabu |
787 | 1 | 50 | Natanael Cano & Peso Pluma & Gabito Ballesteros |
788 | 1 | 50 | Sam Smith |
789 | 1 | 50 | Taylor Swift |
790 rows × 3 columns
'artist':'count'},axis = 'columns',inplace=True) world_df_with_rank_counts.rename({
world_df_with_rank_counts
count | rank | artist(s) | |
---|---|---|---|
0 | 31 | 1 | Eslabon Armado |
1 | 2 | 1 | Bad Bunny |
2 | 2 | 1 | Bizarrap & Peso Pluma |
3 | 1 | 1 | BTS |
4 | 16 | 2 | Bizarrap & Peso Pluma |
... | ... | ... | ... |
785 | 1 | 50 | Lil Durk |
786 | 1 | 50 | Lil Mabu |
787 | 1 | 50 | Natanael Cano & Peso Pluma & Gabito Ballesteros |
788 | 1 | 50 | Sam Smith |
789 | 1 | 50 | Taylor Swift |
790 rows × 3 columns
Approach 2
= world_df.groupby('position')['artist'].value_counts().to_frame()
world_df_with_rank_counts 2) world_df_with_rank_counts.sample(
artist | ||
---|---|---|
position | artist | |
26 | Miley Cyrus | 1 |
33 | Tina Turner | 1 |
'artist':'count'},axis = 'columns', inplace = True) world_df_with_rank_counts.rename({
=True) world_df_with_rank_counts.reset_index(inplace
world_df_with_rank_counts
position | artist | count | |
---|---|---|---|
0 | 1 | Eslabon Armado | 31 |
1 | 1 | Bad Bunny | 2 |
2 | 1 | Bizarrap & Peso Pluma | 2 |
3 | 1 | BTS | 1 |
4 | 2 | Bizarrap & Peso Pluma | 16 |
... | ... | ... | ... |
785 | 50 | Lil Durk | 1 |
786 | 50 | Lil Mabu | 1 |
787 | 50 | Natanael Cano & Peso Pluma & Gabito Ballesteros | 1 |
788 | 50 | Sam Smith | 1 |
789 | 50 | Taylor Swift | 1 |
790 rows × 3 columns
'position'] ==1] world_df_with_rank_counts[world_df_with_rank_counts[
position | artist | count | |
---|---|---|---|
0 | 1 | Eslabon Armado | 31 |
1 | 1 | Bad Bunny | 2 |
2 | 1 | Bizarrap & Peso Pluma | 2 |
3 | 1 | BTS | 1 |
= world_df_with_rank_counts[world_df_with_rank_counts['position'] ==1]['count'].idxmax() # new : idxmax() max_count_index
#Revisiting .loc world_df_with_rank_counts.loc[max_count_index].to_frame().T
position | artist | count | |
---|---|---|---|
0 | 1 | Eslabon Armado | 31 |
# for easier view : **Q: For each position (1 to 50), which artist was in that rank the maximum number of days** ( in total , needn't be consecutive)
= []
list_of_dataframes for position in range(1,51):
= world_df_with_rank_counts[world_df_with_rank_counts['position'] ==position]['count'].idxmax()
max_count_index list_of_dataframes.append(world_df_with_rank_counts.loc[max_count_index].to_frame().T )
10] list_of_dataframes[
position | artist | count | |
---|---|---|---|
69 | 11 | SZA | 9 |
pd.concat(list of dataframes)
= pd.concat(list_of_dataframes)
merged_df merged_df
position | artist | count | |
---|---|---|---|
0 | 1 | Eslabon Armado | 31 |
4 | 2 | Bizarrap & Peso Pluma | 16 |
9 | 3 | Bad Bunny | 18 |
14 | 4 | Yng Lvcas & Peso Pluma | 13 |
20 | 5 | Grupo Frontera & Bad Bunny | 11 |
26 | 6 | Miley Cyrus | 19 |
33 | 7 | David Kushner | 10 |
41 | 8 | SZA | 9 |
50 | 9 | Harry Styles | 14 |
59 | 10 | David Kushner | 8 |
69 | 11 | SZA | 9 |
79 | 12 | Fuerza Regida | 7 |
92 | 13 | Feid & Young Miko | 11 |
105 | 14 | Metro Boomin | 12 |
116 | 15 | Metro Boomin | 5 |
130 | 16 | Metro Boomin | 8 |
144 | 17 | Rema | 6 |
163 | 18 | Rema | 5 |
180 | 19 | Metro Boomin | 7 |
196 | 20 | Metro Boomin | 8 |
213 | 21 | Junior H & Peso Pluma | 4 |
232 | 22 | PinkPantheress & Ice Spice | 7 |
250 | 23 | Metro Boomin | 5 |
267 | 24 | Lana Del Rey | 4 |
283 | 25 | The Weeknd | 6 |
303 | 26 | Morgan Wallen | 6 |
323 | 27 | Morgan Wallen | 5 |
341 | 28 | Taylor Swift | 8 |
355 | 29 | Arctic Monkeys | 5 |
373 | 30 | The Weeknd | 10 |
388 | 31 | The Weeknd | 6 |
406 | 32 | Taylor Swift | 5 |
425 | 33 | d4vd | 4 |
446 | 34 | Metro Boomin | 4 |
465 | 35 | The Weeknd | 6 |
485 | 36 | The Weeknd | 6 |
505 | 37 | Yandel | 4 |
529 | 38 | The Weeknd | 7 |
548 | 39 | The Weeknd | 7 |
566 | 40 | Tom Odell | 5 |
585 | 41 | JVKE | 5 |
602 | 42 | Libianca | 6 |
623 | 43 | OneRepublic | 5 |
643 | 44 | Libianca | 6 |
663 | 45 | Yahritza Y Su Esencia & Grupo Frontera | 4 |
687 | 46 | Manuel Turizo | 4 |
710 | 47 | Manuel Turizo | 4 |
730 | 48 | Sam Smith | 4 |
747 | 49 | Taylor Swift | 4 |
769 | 50 | Peso Pluma | 4 |
set_index()
"position",drop = True,inplace = True) merged_df.set_index(
del merged_df['count']
merged_df
artist | |
---|---|
position | |
1 | Eslabon Armado |
2 | Bizarrap & Peso Pluma |
3 | Bad Bunny |
4 | Yng Lvcas & Peso Pluma |
5 | Grupo Frontera & Bad Bunny |
6 | Miley Cyrus |
7 | David Kushner |
8 | SZA |
9 | Harry Styles |
10 | David Kushner |
11 | SZA |
12 | Fuerza Regida |
13 | Feid & Young Miko |
14 | Metro Boomin |
15 | Metro Boomin |
16 | Metro Boomin |
17 | Rema |
18 | Rema |
19 | Metro Boomin |
20 | Metro Boomin |
21 | Junior H & Peso Pluma |
22 | PinkPantheress & Ice Spice |
23 | Metro Boomin |
24 | Lana Del Rey |
25 | The Weeknd |
26 | Morgan Wallen |
27 | Morgan Wallen |
28 | Taylor Swift |
29 | Arctic Monkeys |
30 | The Weeknd |
31 | The Weeknd |
32 | Taylor Swift |
33 | d4vd |
34 | Metro Boomin |
35 | The Weeknd |
36 | The Weeknd |
37 | Yandel |
38 | The Weeknd |
39 | The Weeknd |
40 | Tom Odell |
41 | JVKE |
42 | Libianca |
43 | OneRepublic |
44 | Libianca |
45 | Yahritza Y Su Esencia & Grupo Frontera |
46 | Manuel Turizo |
47 | Manuel Turizo |
48 | Sam Smith |
49 | Taylor Swift |
50 | Peso Pluma |
Q. How many times has an artist taken a spot between 1 and 50 ?
For each artist, how many times have they taken a spot between 1 and 50 (both inclusive)
world_df.sample()
date | position | song | artist | popularity | duration_ms | album_type | total_tracks | release_date | is_explicit | album_cover_url | |
---|---|---|---|---|---|---|---|---|---|---|---|
1458 | 2023-06-16 | 9 | As It Was | Harry Styles | 93 | 167303 | album | 13 | 2022-05-20 | False | https://i.scdn.co/image/ab67616d0000b2732e8ed7... |
= world_df.groupby('artist')['position'].value_counts().to_frame()
rank_count_df rank_count_df
position | ||
---|---|---|
artist | position | |
Alessandra | 42 | 1 |
47 | 1 | |
Arctic Monkeys | 29 | 5 |
25 | 4 | |
22 | 3 | |
... | ... | ... |
d4vd | 39 | 1 |
40 | 1 | |
41 | 1 | |
42 | 1 | |
43 | 1 |
790 rows × 1 columns
'position':'number of times'}, axis = 'columns',inplace = True) rank_count_df.rename({
= True) rank_count_df.reset_index(inplace
rank_count_df
artist | position | number of times | |
---|---|---|---|
0 | Alessandra | 42 | 1 |
1 | Alessandra | 47 | 1 |
2 | Arctic Monkeys | 29 | 5 |
3 | Arctic Monkeys | 25 | 4 |
4 | Arctic Monkeys | 22 | 3 |
... | ... | ... | ... |
785 | d4vd | 39 | 1 |
786 | d4vd | 40 | 1 |
787 | d4vd | 41 | 1 |
788 | d4vd | 42 | 1 |
789 | d4vd | 43 | 1 |
790 rows × 3 columns
= rank_count_df.groupby('artist')['number of times'].sum().to_frame() top_50_count_for_each_artist
= 'number of times', ascending=False) top_50_count_for_each_artist.sort_values(by
number of times | |
---|---|
artist | |
The Weeknd | 108 |
Taylor Swift | 92 |
Metro Boomin | 91 |
Miley Cyrus | 72 |
ROSALÍA & Rauw Alejandro | 36 |
... | ... |
JISOO | 1 |
Stray Kids | 1 |
Fast & Furious: The Fast Saga | 1 |
Saiko & Feid & Quevedo | 1 |
Eladio Carrion | 1 |
75 rows × 1 columns
3. Joins
(discussed in Lecture 3.3)
= world_df[['date','position','artist','song']]
world_info 3) world_info.sample(
date | position | artist | song | |
---|---|---|---|---|
1037 | 2023-06-07 | 38 | The Weeknd | Die For You |
557 | 2023-05-29 | 8 | SZA | Kill Bill |
975 | 2023-06-06 | 26 | Fast & Furious: The Fast Saga & Jimin & BTS | Angel Pt. 1 (feat. Jimin of BTS, JVKE & Muni L... |
world_info.shape
(1800, 4)
'position'] = world_df['position'].astype('int') world_df[
= usa_df[['date','position','artist','song']]
usa_info 3) usa_info.sample(
date | position | artist | song | |
---|---|---|---|---|
1763 | 2023-06-22 | 14 | Metro Boomin | Annihilate (Spider-Man: Across the Spider-Vers... |
1785 | 2023-06-22 | 36 | Metro Boomin | Creepin' (with The Weeknd & 21 Savage) |
120 | 2023-05-20 | 21 | Peso Pluma | Por las Noches |
usa_info.shape
(1800, 4)
'position'] = usa_info['position'].astype('int') usa_info[
/var/folders/r0/bggl6hf15j708chpxqrx5tp00000gn/T/ipykernel_2958/3011614831.py:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
usa_info['position'] = usa_info['position'].astype('int')
=usa_info, on= ['date','artist','song'], how='inner') #default is inner world_info.merge(right
date | position_x | artist | song | position_y | |
---|---|---|---|---|---|
0 | 2023-05-18 | 1 | Eslabon Armado | Ella Baila Sola | 1 |
1 | 2023-05-18 | 2 | Grupo Frontera & Bad Bunny | un x100to | 4 |
2 | 2023-05-18 | 3 | Yng Lvcas & Peso Pluma | La Bebe - Remix | 8 |
3 | 2023-05-18 | 4 | FIFTY FIFTY | Cupid - Twin Ver. | 7 |
4 | 2023-05-18 | 5 | Miley Cyrus | Flowers | 14 |
... | ... | ... | ... | ... | ... |
983 | 2023-06-22 | 37 | Junior H & Peso Pluma | El Azul | 39 |
984 | 2023-06-22 | 43 | Kali Uchis | Moonlight | 40 |
985 | 2023-06-22 | 44 | PinkPantheress & Ice Spice | Boy's a Liar Pt. 2 | 18 |
986 | 2023-06-22 | 48 | Peso Pluma & Natanael Cano | PRC | 37 |
987 | 2023-06-22 | 50 | Lil Durk | All My Life (feat. J. Cole) | 8 |
988 rows × 5 columns
3.1 Inner Join
=usa_info, on= ['date','artist','song'], how='inner', suffixes=['_world','_usa']) #default is inner world_info.merge(right
date | position_world | artist | song | position_usa | |
---|---|---|---|---|---|
0 | 2023-05-18 | 1 | Eslabon Armado | Ella Baila Sola | 1 |
1 | 2023-05-18 | 2 | Grupo Frontera & Bad Bunny | un x100to | 4 |
2 | 2023-05-18 | 3 | Yng Lvcas & Peso Pluma | La Bebe - Remix | 8 |
3 | 2023-05-18 | 4 | FIFTY FIFTY | Cupid - Twin Ver. | 7 |
4 | 2023-05-18 | 5 | Miley Cyrus | Flowers | 14 |
... | ... | ... | ... | ... | ... |
983 | 2023-06-22 | 37 | Junior H & Peso Pluma | El Azul | 39 |
984 | 2023-06-22 | 43 | Kali Uchis | Moonlight | 40 |
985 | 2023-06-22 | 44 | PinkPantheress & Ice Spice | Boy's a Liar Pt. 2 | 18 |
986 | 2023-06-22 | 48 | Peso Pluma & Natanael Cano | PRC | 37 |
987 | 2023-06-22 | 50 | Lil Durk | All My Life (feat. J. Cole) | 8 |
988 rows × 5 columns
On a given date, a particular artist was on BOTH global and US spotify top 50. position_world gives their global spotify position, and position_usa gives their spotify top 50 in US position
3.2 Outer Join
=usa_info, on= ['date','artist','song'], how='outer', suffixes=['_world','_usa']) #default is inner world_info.merge(right
date | position_world | artist | song | position_usa | |
---|---|---|---|---|---|
0 | 2023-05-18 | 1.0 | Eslabon Armado | Ella Baila Sola | 1.0 |
1 | 2023-05-18 | 2.0 | Grupo Frontera & Bad Bunny | un x100to | 4.0 |
2 | 2023-05-18 | 3.0 | Yng Lvcas & Peso Pluma | La Bebe - Remix | 8.0 |
3 | 2023-05-18 | 4.0 | FIFTY FIFTY | Cupid - Twin Ver. | 7.0 |
4 | 2023-05-18 | 5.0 | Miley Cyrus | Flowers | 14.0 |
... | ... | ... | ... | ... | ... |
2607 | 2023-06-22 | NaN | Peso Pluma | Por las Noches | 45.0 |
2608 | 2023-06-22 | NaN | NLE Choppa | SLUT ME OUT | 47.0 |
2609 | 2023-06-22 | NaN | Drake | Jimmy Cooks (feat. 21 Savage) | 48.0 |
2610 | 2023-06-22 | NaN | Kendrick Lamar | Money Trees | 49.0 |
2611 | 2023-06-22 | NaN | SZA | Snooze | 50.0 |
2612 rows × 5 columns
On a given date, a particular artist was on EITHER global or US spotify top 50. position_world gives their global spotify position, and position_usa gives their spotify top 50 in US position
3.3 Left Join
=usa_info, on= ['date','artist','song'], how='left', suffixes=['_world','_usa']) #default is inner world_info.merge(right
date | position_world | artist | song | position_usa | |
---|---|---|---|---|---|
0 | 2023-05-18 | 1 | Eslabon Armado | Ella Baila Sola | 1.0 |
1 | 2023-05-18 | 2 | Grupo Frontera & Bad Bunny | un x100to | 4.0 |
2 | 2023-05-18 | 3 | Yng Lvcas & Peso Pluma | La Bebe - Remix | 8.0 |
3 | 2023-05-18 | 4 | FIFTY FIFTY | Cupid - Twin Ver. | 7.0 |
4 | 2023-05-18 | 5 | Miley Cyrus | Flowers | 14.0 |
... | ... | ... | ... | ... | ... |
1795 | 2023-06-22 | 46 | Glass Animals | Heat Waves | NaN |
1796 | 2023-06-22 | 47 | Yahritza Y Su Esencia & Grupo Frontera | Frágil | NaN |
1797 | 2023-06-22 | 48 | Peso Pluma & Natanael Cano | PRC | 37.0 |
1798 | 2023-06-22 | 49 | Dua Lipa | Dance The Night (From Barbie The Album) | NaN |
1799 | 2023-06-22 | 50 | Lil Durk | All My Life (feat. J. Cole) | 8.0 |
1800 rows × 5 columns
On a given date, a particular artist was on global spotify top 50 list. If the same artist for the same song, on the same date, was also on USA top 50, then their rank is given in position_usa. If they were not in USA top 50, position_usa would be NaN( Not a Number)
3.4 Right Join
=usa_info, on= ['date','artist','song'], how='right', suffixes=['_world','_usa']) #default is inner world_info.merge(right
date | position_world | artist | song | position_usa | |
---|---|---|---|---|---|
0 | 2023-05-18 | 1.0 | Eslabon Armado | Ella Baila Sola | 1 |
1 | 2023-05-18 | 27.0 | Morgan Wallen | Last Night | 2 |
2 | 2023-05-18 | 26.0 | Lil Durk & J. Cole | All My Life (feat. J. Cole) | 3 |
3 | 2023-05-18 | 2.0 | Grupo Frontera & Bad Bunny | un x100to | 4 |
4 | 2023-05-18 | 7.0 | SZA | Kill Bill | 5 |
... | ... | ... | ... | ... | ... |
1795 | 2023-06-22 | 31.0 | The Weeknd | Die For You | 46 |
1796 | 2023-06-22 | NaN | NLE Choppa | SLUT ME OUT | 47 |
1797 | 2023-06-22 | NaN | Drake | Jimmy Cooks (feat. 21 Savage) | 48 |
1798 | 2023-06-22 | NaN | Kendrick Lamar | Money Trees | 49 |
1799 | 2023-06-22 | NaN | SZA | Snooze | 50 |
1800 rows × 5 columns
On a given date, a particular artist was on usa spotify top 50 list. If the same artist for the same song, on the same date, was also on Global top 50, then their rank is given in position_world. If they were not in Global top 50, position_world would be NaN( Not a Number)
3.5 Cross ie Cartisean Product
= pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
left left
col1 | col2 | |
---|---|---|
0 | 1 | 3 |
1 | 2 | 4 |
= pd.DataFrame({'col3': [5, 6]})
right right
col3 | |
---|---|
0 | 5 |
1 | 6 |
="cross") left.merge(right,how
col1 | col2 | col3 | |
---|---|---|---|
0 | 1 | 3 | 5 |
1 | 1 | 3 | 6 |
2 | 2 | 4 | 5 |
3 | 2 | 4 | 6 |