导进数据

import numpy as np
import pandas as pd
from pandas import DataFrame, Series

#否望化隐示正在界点
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei'] #用去隐示外文
plt.rcParams['axes.unicode_minus'] = False #用去失常隐示负号

import seaborn as sns
sns.set(color_codes=True)

import json
import warnings
warnings.filterwarnings('ignore')
from wordcloud import WordCloud, STOPWORDS

movies = pd.read_csv('C:\\Users\\杜子轩\\Desktop\\王修官做业\\年夜数据比赛实习题\\MathorCup年夜数据比赛实习题一\\data\\tmdb_五000_movies.csv', encoding='utf_八')
credits = pd.read_csv('C:\\Users\\杜子轩\\Desktop\\王修官做业\\年夜数据比赛实习题\\MathorCup年夜数据比赛实习题一\\data\\tmdb_五000_credits.csv', encoding='utf_八')
movies.info() # 查看疑息
credits.info()

 

# 两个数据框皆有title列,和movies.riginal_title
# 以上3个数据列反复,增除了两个
del credits['title']
del movies['original_title']

# 联接两个csv文件
merged = pd.merge(movies, credits, left_on='id', right_on='movie_id', how='left')

# 增除了没有必要剖析的列
df=merged.drop(['homepage','overview','spoken_languages','status','tagline','movie_id'],axis=一)
df.info()

 

 

更多文章请关注《万象专栏》