파이썬을이용한빅데이터수집. 분석과시각화 Part 2. 데이터시각화 이원하
목 차 1 2 3 4 WordCloud 자연어처리 Matplotlib 그래프 Folium 지도시각화 Seabean - Heatmap 03 07 16 21
1 WORDCLOUD - 자연어처리
KoNLPy 형태소기반자연어처리 http://www.oracle.com/technetwork/java/javase/downloads/index.html http://www.lfd.uci.edu/~gohlke/pythonlibs/#jpype >>> from konlpy.tag import Kkma >>> from konlpy.utils import pprint >>> kkma = Kkma() >>> pprint(kkma.nouns(u' 명사만을추출하여워드클라우드를그려봅니다 ')) [' 명사 ', ' 추출 ', ' 워드 ', ' 워드클라우드 ', ' 클라우드 '] 4
KoNLPy 내부객체 5
KoNLPy & pytagcloud >>> from collections import Counter >>> color = ['red', 'blue', 'red', 'red'] >>> counter_color = Counter(color) >>> print(counter_color_a) Counter({'red': 3, 'blue': 1}) 6 jtbcnews_facebook_2017-01-01_2017-06-30.txt >>> from konlpy.tag import Twitter >>> from collections import Counter >>> import pytagcloud >>> import webbrowser >>> import re >>> openfilename = "c:/python_sample/jtbcnews_facebook_2017-01-01_2017-06-30.txt" >>> cloudimagepath = openfilename + ".jpg" >>> rfile = open(openfilename, 'r', encoding='utf-8').read() >>> data = re.sub(r'[^\w]', ' ', rfile) >>> nlp = Twitter() >>> nouns = nlp.nouns(data) >>> count = Counter(nouns) >>> wordinfo = dict() >>> for tags, counts in count.most_common(50): if (len(str(tags)) > 1): wordinfo[tags] = counts print ("%s : %d" % (tags, counts)) >>> taglist = pytagcloud.make_tags(dict(wordinfo).items(), maxsize=80) >>> pytagcloud.create_tag_image(taglist, cloudimagepath, size=(640, 480), fontname='korean', rectangular=false) >>> webbrowser.open(cloudimagepath)
21 MATPLOTLIB 그래프
Matplotlib 그래프 Package [ 파이썬설치경로 ]>pip install matplotlib >>> from matplotlib import pyplot >>> pyplot.plot([1,2,3,4],[10,30,20,40]) >>> pyplot.show() 8
Matplotlib 그래프 Package File "C:\python\lib\site-packages\matplotlib\font_manager.py", line 1412, in <module> fontmanager = pickle_load(_fmcache) def win32installedfonts(directory=none, fontext='ttf'): 중략 key, direc, any = winreg.enumvalue( local, j) if not is_string_like(direc): continue if not os.path.dirname(direc): direc = os.path.join(directory, direc) direc = direc.split( \0, 1)[0] # 파이썬경로문제해결 direc = os.path.abspath(direc).lower() 9
Matplotlib 그래프 Package >>> import matplotlib.pyplot as plt >>> plt.plot([1,2,3,4]) >>> plt.xlabel('x-axis label ) #x 축라벨타이틀설정 >>> plt.ylabel('y-axis label') #y 축라벨타이틀설정 >>> plt.show() >>> plt.plot([1,2,3,4],[1,2,3,4]) >>> plt.show() 10
Matplotlib 그래프 Package >>> plt.plot([1,2,3,4], [1,2,3,4], 'ro') >>> plt.show() 11
Matplotlib 그래프 Package >>> plt.plot([1,2,3,4],[1,2,3,4],'r-', [1,2,3,4],[3,4,5,6],'v-') >>> plt.show() >>> from matplotlib import font_manager, rc >>> import matplotlib >>> font_location = "c:/windows/fonts/malgun.ttf" >>> font_name = font_manager.fontproperties(fname=font_location).get_name() >>> matplotlib.rc('font', family=font_name) >>> plt.plot([1,2,3,4]) >>> plt.xlabel('x축한글표시 ') >>> plt.show() 12
Matplotlib 그래프 Package >>> plt.figure() >>> plt.subplot(1, 2, 1) >>> plt.plot([1,2,3,4], [1,2,3,4]) >>> plt.subplot(1, 2, 2) >>> plt.plot([5,6,7,8],[5,6,7,8]) >>> plt.show() 13
Matplotlib 그래프 Package >>> plt.plot([1,2,3,4], [1,2,3,4]) >>> plt.xlabel('x축 ') >>> plt.ylabel('y축 ') >>> plt.title('matplotlib 활용 ') >>> plt.text(3.5, 3.0, ' 평균 :2.5') >>> plt.grid(true) >>> plt.show() 14
Matplotlib 그래프 Package jtbcnews_facebook_2017-01-01_2017-06-30.txt >>> from konlpy.tag import Twitter >>> from collections import Counter >>> import pytagcloud >>> import webbrowser >>> import re >>> openfilename = "c:/python_sample/jtbcnews_facebook_2017-01- 01_2017-06-30.txt" >>> cloudimagepath = openfilename + ".jpg" >>> rfile = open(openfilename, 'r', encoding='utf-8').read() >>> data = re.sub(r'[^\w]', ' ', rfile) >>> nlp = Twitter() >>> nouns = nlp.nouns(data) >>> count = Counter(nouns) >>> wordinfo = dict() >>> for tags, counts in count.most_common(50): if (len(str(tags)) > 1): wordinfo[tags] = counts print ("%s : %d" % (tags, counts)) >>> import matplotlib.pyplot as plt >>> from matplotlib import font_manager, rc >>> import matplotlib >>> font_location = "c:/windows/fonts/malgun.ttf" >>> font_name = font_manager.fontproperties(fname=font_location).get_name() matplotlib.rc('font', family=font_name) >>> plt.xlabel(' 주요단어 ') >>> plt.ylabel(' 빈도수 ') >>> plt.grid(true) >>> Sorted_Dict_Values = sorted(wordinfo.values(), reverse=true) >>> Sorted_Dict_Keys = sorted(wordinfo, key=wordinfo.get, reverse=true) >>> plt.bar(range(len(wordinfo)), Sorted_Dict_Values, align='center') >>> plt.xticks(range(len(wordinfo)), list(sorted_dict_keys), rotation='70') >>> plt.show() 15
21 FOLIUM 지도시각화
Folium 지도시각화 Package [ 파이썬설치경로 ]>pip install folium >>> import folium >>> map_osm = folium.map(location=[37.566345, 126.977893]) >>> map_osm.save( c:/python_sample/map1.html') 17
Folium 지도시각화 Package >>> import folium >>> map_osm = folium.map(location=[37.566345, 126.977893], zoom_st art=17) >>> map_osm.save( c:/python_sample/map2.html') >>> import folium >>> map_osm = folium.map(location=[37.566345, 126.977893], zoom_st art=17, tiles='stamen Terrain') >>> map_osm.save('c:/python_sample/map3.html') >>> map_osm = folium.map(location=[37.566345, 126.977893], zoom_st art=17, tiles='stamen Toner') >>> map_osm.save('c:/python_sample/map4.html') 18
Folium 지도시각화 Package >>> map_osm = folium.map(location=[37.566345, 126.977893], zoom_start=17) >>> folium.marker([37.566345, 126.977893], popup=' 서울특별시청 ').add_to(map_osm) >>> folium.marker([37.5658859, 126.9754788], popup=' 덕수궁 ').add_to(map_osm) >>> map_osm.save(c:/python_sample/map5.html')
Folium 지도시각화 Package import urllib.request import json import folium import webbrowser if (retdata == None): return None else: return json.loads(retdata) def get_request_url(url): client_id = "PIKKM1p_iFKvlrgZdyk3" client_secret = "scxkesjyib" req = urllib.request.request(url) req.add_header("x-naver-client-id", client_id) req.add_header("x-naver-client-secret", client_secret) try: response = urllib.request.urlopen(req) if response.getcode() == 200: return response.read().decode('utf-8') except Exception as e: print(e) return None def getgeodata(address): base = "https://openapi.naver.com/v1/map/geocode" parameters = "?query=%s" % urllib.parse.quote(address) url = base + parameters retdata = get_request_url(url) def main(): address = input(" 주소를입력하세요 : ") jsonresult = getgeodata(address) if (jsonresult == None): print (" 주소검색결과가없습니다 ") elif 'result' in jsonresult.keys(): lattitude = jsonresult['result']['items'][0]['point']['y'] longitude = jsonresult['result']['items'][0]['point']['x'] map_osm = folium.map(location=[lattitude, longitude], zoom_start=17) folium.marker(location=[lattitude, longitude], popup=address).add_to(map_osm) map_osm.save('c:/python_sample/address.html') webbrowser.open('c:/python_sample/address.html') else: print (" 주소검색결과가없습니다 ") if name == ' main ': main()