R_Graphics_cookbook_in_sage

1500 days ago by takepwave

RグラフィックスクックブックをSageで試してみる

Rグラフィックスクックブック ―ggplot2によるグラフ作成のレシピ集 にでている例題をpython版ggplotで試し、ggplotでサポートしていない部分はRのggplot2をSageから操作してプロットしてみました。

Sageでデータをプロットするときに参考にしてください。

# Rの必要なライブラリ #r("install.packages('ggplot2')") r('library(ggplot2)') #r("install.packages('gcookbook')") r('library(gcookbook)') # RUtilでjsonliteを使用するため、未インストールならインストールが必要 #r("install.packages('jsonlite')") # Pythonパッケージのインポート import pandas as pd import numpy as np from ggplot import * # RUtilにRとPandasのデータフレームを相互に変換する関数を追加 load(DATA + 'RUtil.py') 
       

連続値をカテゴリに変換する

Pandasを使って連続値の区間で区切って処理する方法の紹介です。知っていると便利です。

age = np.array([20, 22, 25, 27, 21, 23, 37, 31, 61, 45, 41, 32]) sex = np.array(['F', 'M', 'M', 'M', 'F', 'M', 'F', 'M', 'F', 'M', 'F', 'M']) df = pd.DataFrame({'age': age, 'sex': sex}) df 
       
    age sex
0    20   F
1    22   M
2    25   M
3    27   M
4    21   F
5    23   M
6    37   F
7    31   M
8    61   F
9    45   M
10   41   F
11   32   M
    age sex
0    20   F
1    22   M
2    25   M
3    27   M
4    21   F
5    23   M
6    37   F
7    31   M
8    61   F
9    45   M
10   41   F
11   32   M
# カテゴリ分けする区切り値 bins = [18, 25, 35, 60, 100] cat_names = ['youth', 'YoungAdult', 'MiddleAged', 'Senior'] df['bins'] = pd.cut(df.age, bins, labels=cat_names) df.head() 
       
   age sex        bins
0   20   F       youth
1   22   M       youth
2   25   M       youth
3   27   M  YoungAdult
4   21   F       youth
   age sex        bins
0   20   F       youth
1   22   M       youth
2   25   M       youth
3   27   M  YoungAdult
4   21   F       youth

ここからプロット例

# gcookbookのサンプルデータからheightweightを取得する heightweight = RDf2PandaDf("heightweight") heightweight.head() 
       
   ageMonth  ageYear  heightIn sex  weightLb
0       143    11.92      56.3   f      85.0
1       155    12.92      62.3   f     105.0
2       153    12.75      63.3   f     108.0
3       161    13.42      59.0   f      92.0
4       191    15.92      62.5   f     112.5
   ageMonth  ageYear  heightIn sex  weightLb
0       143    11.92      56.3   f      85.0
1       155    12.92      62.3   f     105.0
2       153    12.75      63.3   f     108.0
3       161    13.42      59.0   f      92.0
4       191    15.92      62.5   f     112.5
heightweight.tail() 
       
     ageMonth  ageYear  heightIn sex  weightLb
231       164    13.67      66.5   m     112.0
232       189    15.75      65.0   m     114.0
233       164    13.67      61.5   m     140.0
234       167    13.92      62.0   m     107.5
235       151    12.58      59.3   m      87.0
     ageMonth  ageYear  heightIn sex  weightLb
231       164    13.67      66.5   m     112.0
232       189    15.75      65.0   m     114.0
233       164    13.67      61.5   m     140.0
234       167    13.92      62.0   m     107.5
235       151    12.58      59.3   m      87.0
# Rec.2.1 散布図を作成する ggplot(mtcars, aes(x='wt', y='mpg')) + geom_point() 
       
<ggplot: (8769175188637)>
<ggplot: (8769175188637)>
ggsave('Rec.2.1.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# Rec.2.2 折れ線グラフを作成する # 単純なデータなら以下の様にRから持ってくることもできる pressure = pd.DataFrame(sageobj(r('pressure'))['DATA']) pressure.head() 
       
   pressure  temperature
0    0.0002            0
1    0.0012           20
2    0.0060           40
3    0.0300           60
4    0.0900           80
   pressure  temperature
0    0.0002            0
1    0.0012           20
2    0.0060           40
3    0.0300           60
4    0.0900           80
ggplot(pressure, aes(x='temperature', y='pressure')) +geom_line() + geom_point() 
       
<ggplot: (8769175112865)>
<ggplot: (8769175112865)>
ggsave('Rec.2.2.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# Rec.2.3 棒グラフを作成する # cylは連続値なので、factorで離散として扱う # factorが上手く機能していない ggplot(mtcars, aes(x='factor(cyl)')) +geom_bar() 
       
Traceback (click to the left of this block for traceback)
...
AttributeError: 'EvalEnvironment' object has no attribute
'add_outer_namespace'
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "_sage_input_25.py", line 10, in <module>
    exec compile(u'open("___code___.py","w").write("# -*- coding: utf-8 -*-\\n" + _support_.preparse_worksheet_cell(base64.b64decode("IyBSZWMuMi4zIOajkuOCsOODqeODleOCkuS9nOaIkOOBmeOCiwojIGN5bOOBr+mAo+e2muWApOOBquOBruOBp+OAgWZhY3RvcuOBp+mbouaVo+OBqOOBl+OBpuaJseOBhgojIGZhY3RvcuOBjOS4iuaJi+OBj+apn+iDveOBl+OBpuOBhOOBquOBhApnZ3Bsb3QobXRjYXJzLCBhZXMoeD0nZmFjdG9yKGN5bCknKSkgK2dlb21fYmFyKCk="),globals())+"\\n"); execfile(os.path.abspath("___code___.py"))
  File "", line 1, in <module>
    
  File "/tmp/tmpVEH1Fr/___code___.py", line 5, in <module>
    exec compile(u"ggplot(mtcars, aes(x='factor(cyl)')) +geom_bar()" + '\n', '', 'single')
  File "", line 1, in <module>
    
  File "/usr/local/sage-6.7/local/lib/python2.7/site-packages/ggplot-0.6.5-py2.7.egg/ggplot/ggplot.py", line 66, in __init__
    self.data = _apply_transforms(data, self.aesthetics)
  File "/usr/local/sage-6.7/local/lib/python2.7/site-packages/ggplot-0.6.5-py2.7.egg/ggplot/ggplot.py", line 548, in _apply_transforms
    env.add_outer_namespace({"factor":factor})
AttributeError: 'EvalEnvironment' object has no attribute 'add_outer_namespace'
ggsave('Rec.2.3.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# Rec.2.4 ヒストグラムを作成する # binwidth='4'→binwidth=4.0が機能しない ggplot(mtcars, aes(x='mpg')) + geom_histogram(binwidth=4.0) 
       
<ggplot: (8769174542105)>
<ggplot: (8769174542105)>
ggsave('Rec.2.4.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# Rの結果と異なる! graph = preGraph("fig2.4.pdf") r('p <- ggplot(mtcars, aes(x=mpg)) + geom_histogram(binwidth=4)') r('plot(p)') postGraph(graph) 
       
# ToothGrowthデータをRから持ってくる ToothGrowth = RDf2PandaDf('ToothGrowth') ToothGrowth.head() 
       
   dose   len supp
0   0.5   4.2   VC
1   0.5  11.5   VC
2   0.5   7.3   VC
3   0.5   5.8   VC
4   0.5   6.4   VC
   dose   len supp
0   0.5   4.2   VC
1   0.5  11.5   VC
2   0.5   7.3   VC
3   0.5   5.8   VC
4   0.5   6.4   VC
# Rec.2.5 箱ひげ図を作成する # geom_boxplotはまだ実装されていないみたい # ggplot(ToothGrowth, aes(x='interaction(supp, dose)', y='len')) + geom_boxplot() 
       
#ggsave('Rec.2.4.png', dpi=50) 
       
graph = preGraph("fig2.5.pdf") r('p <- ggplot(ToothGrowth, aes(x=interaction(supp, dose), y=len)) + geom_boxplot()') r('plot(p)') postGraph(graph) 
       
# Rec.2.6 関数曲線をプロットする # stat_functionはまだ実装されていないみたい graph = preGraph("fig2.6.pdf") r('myfun <- function(xvar){ 1/(1 + exp(-xvar + 10)) }') r('p <- ggplot(data.frame(x=c(0, 20)), aes(x=x)) + stat_function(fun=myfun, geom="line")') r('plot(p)') postGraph(graph) 
       
# Rec.3.1 棒グラフを作成する pg_mean = pd.DataFrame({'group':['ctrl', 'trt1', 'trt2'], 'weight': [5.032, 4.661, 5.526]}) pg_mean.head() 
       
  group            weight
0  ctrl  5.03200000000000
1  trt1  4.66100000000000
2  trt2  5.52600000000000
  group            weight
0  ctrl  5.03200000000000
1  trt1  4.66100000000000
2  trt2  5.52600000000000
ggplot(pg_mean, aes(x='group', weight='weight')) + geom_bar() 
       
<ggplot: (8769174493181)>
<ggplot: (8769174493181)>
ggsave('Rec.3.1.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# R版と指定方法が異なるので graph = preGraph("fig3.1.pdf") r('p <- ggplot(pg_mean, aes(x=group, y=weight)) + geom_bar(stat="identity")') r('plot(p)') postGraph(graph) 
       
# Fig3-2 BOD = RDf2PandaDf("BOD") BOD.head() 
       
   Time  demand
0     1     8.3
1     2    10.3
2     3    19.0
3     4    16.0
4     5    15.6
   Time  demand
0     1     8.3
1     2    10.3
2     3    19.0
3     4    16.0
4     5    15.6
# ggplotの場合、factor(Time)のようにプロットされる # stat="identity"が効かない ggplot(BOD, aes(x='Time', weight='demand')) + geom_bar() 
       
<ggplot: (8769174385521)>
<ggplot: (8769174385521)>
ggsave('fig.3.2a.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# aesのfactor(Time), geom_barのstat="identity"が効かない ggplot(BOD, aes(x='factor(Time)', weight='demand')) + geom_bar(stat="identity") 
       
Traceback (click to the left of this block for traceback)
...
AttributeError: 'EvalEnvironment' object has no attribute
'add_outer_namespace'
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "_sage_input_51.py", line 10, in <module>
    exec compile(u'open("___code___.py","w").write("# -*- coding: utf-8 -*-\\n" + _support_.preparse_worksheet_cell(base64.b64decode("IyBhZXPjga5mYWN0b3IoVGltZSksIGdlb21fYmFy44Guc3RhdD0iaWRlbnRpdHki44GM5Yq544GL44Gq44GECmdncGxvdChCT0QsIGFlcyh4PSdmYWN0b3IoVGltZSknLCB3ZWlnaHQ9J2RlbWFuZCcpKSArIGdlb21fYmFyKHN0YXQ9ImlkZW50aXR5Iik="),globals())+"\\n"); execfile(os.path.abspath("___code___.py"))
  File "", line 1, in <module>
    
  File "/tmp/tmpR_yR_4/___code___.py", line 3, in <module>
    exec compile(u'ggplot(BOD, aes(x=\'factor(Time)\', weight=\'demand\')) + geom_bar(stat="identity")
  File "", line 1, in <module>
    
  File "/usr/local/sage-6.7/local/lib/python2.7/site-packages/ggplot-0.6.5-py2.7.egg/ggplot/ggplot.py", line 66, in __init__
    self.data = _apply_transforms(data, self.aesthetics)
  File "/usr/local/sage-6.7/local/lib/python2.7/site-packages/ggplot-0.6.5-py2.7.egg/ggplot/ggplot.py", line 548, in _apply_transforms
    env.add_outer_namespace({"factor":factor})
AttributeError: 'EvalEnvironment' object has no attribute 'add_outer_namespace'
ggsave('fig.3.2b.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# Fig3-3 # geom_barのstat="identity"が効かない ggplot(pg_mean, aes(x='group', weight='weight')) + geom_bar(fill="lightblue", colour="black") 
       
<ggplot: (8769173888185)>
<ggplot: (8769173888185)>
ggsave('fig.3.3.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# Rec.3.2 棒をグループ化する cabbage_exp = pd.DataFrame({'Cultivar': ['c39', 'c39', 'c39', 'c52', 'c52', 'c52'], 'Date': ['d16', 'd20', 'd21', 'd16', 'd20', 'd21'], 'Weight': [3.18, 2.8, 2.74, 2.26, 3.11, 1.47]}) cabbage_exp 
       
  Cultivar Date            Weight
0      c39  d16  3.18000000000000
1      c39  d20  2.80000000000000
2      c39  d21  2.74000000000000
3      c52  d16  2.26000000000000
4      c52  d20  3.11000000000000
5      c52  d21  1.47000000000000
  Cultivar Date            Weight
0      c39  d16  3.18000000000000
1      c39  d20  2.80000000000000
2      c39  d21  2.74000000000000
3      c52  d16  2.26000000000000
4      c52  d20  3.11000000000000
5      c52  d21  1.47000000000000
# 横並びができない。d20の積み重ねの色が変? #ggplot(cabbage_exp, aes(x='factor(Date)', weight='Weight', colour='Cultivar')) + geom_bar(position='dodge') ggplot(cabbage_exp, aes(x='Date', weight='Weight', fill='Cultivar')) + geom_bar() 
       
<ggplot: (8769173716461)>
<ggplot: (8769173716461)>
ggsave('Rec.3.2.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# Rec.3.2 棒をグループ化 graph = preGraph("fig3.4.pdf") r('p <- ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) + geom_bar(position="dodge")') r('plot(p)') postGraph(graph) 
       
# Rec.3.3 個数を示す棒グラフを作成する ggplot(diamonds, aes(x='cut')) + geom_bar() 
       
<ggplot: (8769173640493)>
<ggplot: (8769173640493)>
ggsave('Rec.3.3.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# Rec.3.4 色つきの棒グラフを作成する r('upc <- subset(uspopchange, rank(Change)>40)') graph = preGraph("Rec.3.4.pdf") r('p <- ggplot(upc, aes(x=Abb, y=Change, fill=Region)) + geom_bar(stat="identity")') r('plot(p)') postGraph(graph) 
       
# Rec.3.5 棒の正負によって色を塗り分ける # 値が正か負を示すpos列をデータフレームに追加する r('csub <- subset(climate, Source=="Berkeley" & Year >= 1900)') r('csub$pos <- csub$Anomaly10y >= 0') r('head(csub)') 
       
      Source Year Anomaly1y Anomaly5y Anomaly10y Unc10y   pos
101 Berkeley 1900        NA        NA     -0.171  0.108 FALSE
102 Berkeley 1901        NA        NA     -0.162  0.109 FALSE
103 Berkeley 1902        NA        NA     -0.177  0.108 FALSE
104 Berkeley 1903        NA        NA     -0.199  0.104 FALSE
105 Berkeley 1904        NA        NA     -0.223  0.105 FALSE
106 Berkeley 1905        NA        NA     -0.241  0.107 FALSE
      Source Year Anomaly1y Anomaly5y Anomaly10y Unc10y   pos
101 Berkeley 1900        NA        NA     -0.171  0.108 FALSE
102 Berkeley 1901        NA        NA     -0.162  0.109 FALSE
103 Berkeley 1902        NA        NA     -0.177  0.108 FALSE
104 Berkeley 1903        NA        NA     -0.199  0.104 FALSE
105 Berkeley 1904        NA        NA     -0.223  0.105 FALSE
106 Berkeley 1905        NA        NA     -0.241  0.107 FALSE
graph = preGraph("Rec.3.5.png") r('p <- ggplot(csub, aes(x=Year, y=Anomaly10y, fill=pos)) + geom_bar(stat="identity", position="identity")') r('plot(p)') postGraph(graph) 
       
# Rec.3.6 棒の幅と間隔を調整する # 最大の幅1.0 # stat="identity"指定が効かない ggplot(pg_mean, aes(x='group', weight='weight')) + geom_bar(width=1.0) 
       
<ggplot: (8769173601385)>
<ggplot: (8769173601385)>
ggsave('Rec.3.6.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
r('cabbage_exp') 
       
  Cultivar Date Weight        sd  n         se
1      c39  d16   3.18 0.9566144 10 0.30250803
2      c39  d20   2.80 0.2788867 10 0.08819171
3      c39  d21   2.74 0.9834181 10 0.31098410
4      c52  d16   2.26 0.4452215 10 0.14079141
5      c52  d20   3.11 0.7908505 10 0.25008887
6      c52  d21   1.47 0.2110819 10 0.06674995
  Cultivar Date Weight        sd  n         se
1      c39  d16   3.18 0.9566144 10 0.30250803
2      c39  d20   2.80 0.2788867 10 0.08819171
3      c39  d21   2.74 0.9834181 10 0.31098410
4      c52  d16   2.26 0.4452215 10 0.14079141
5      c52  d20   3.11 0.7908505 10 0.25008887
6      c52  d21   1.47 0.2110819 10 0.06674995
# Rec.3.7 積み上げ棒グラフを作成する graph = preGraph("Rec.3.7.pdf") r('p <- ggplot(cabbage_exp, aes(x=Date, y=Weight, fill=Cultivar)) + geom_bar(stat="identity")') r('plot(p)') postGraph(graph) 
       
# Rec.3.8 100%積み上げ棒グラフ(Practical Data Science版) # 上手く表示されない graph = preGraph("Rec.3.8.pdf") r('p <- ggplot(cabbage_exp) + geom_bar(aes(x=Date, y=Weight, fill=Cultivar), position="fill")') r('plot(p)') postGraph(graph) 
       
# Rec.3.9 棒グラフにラベルを追加する vjustでラベルの位置を調整 # python版はダメ # ggplot(cabbage_exp, aes(x='factor(Date)', weight='Weight')) + geom_bar() + geom_text(aes(y='Weight', label='Weight')) graph = preGraph("Rec.3.9.pdf") r('p <- ggplot(cabbage_exp, aes(x=interaction(Date, Cultivar) , y=Weight)) + geom_bar(stat="identity")+ geom_text(aes(label=Weight, vjust=1.5, colour="white"))') r('plot(p)') postGraph(graph) 
       
#ggsave('Rec.3.9.png', dpi=50) 
       
# geom_textは実装されているが、文字列のプロットのみをサポート ggplot(aes(x='wt', y='mpg', label='name'), data=mtcars) + \ geom_text() 
       
<ggplot: (8769173601473)>
<ggplot: (8769173601473)>
ggsave('test1.0.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# Rec.3.10 クリーブランドのドットプロットを作成する r('tophit <- tophitters2001[1:25,]') # tophitters2001から上位25名を抽出 graph = preGraph("Rec.3.10.pdf") r('p <- ggplot(tophit, aes(x=avg , y=name)) + geom_point()') r('plot(p)') postGraph(graph) 
       
# PDSの手法で、X軸とY軸を入れ替えてみる graph = preGraph("fig-3.29.pdf") r('p <- ggplot(tophit, aes(x=avg , y=name)) + geom_point(size=3) + coord_flip() + theme(axis.text.x=element_text(angle=60, hjust=1))') r('plot(p)') postGraph(graph) 
       
# Rec.4.1 基本的な折れ線グラフを作成する(Python版) ggplot(BOD, aes(x='Time', y='demand')) + \ geom_line() 
       
<ggplot: (8769173371653)>
<ggplot: (8769173371653)>
ggsave('Rec.4.1.0.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# Rec4.2 折れ線グラフに点を追加する(Python版) ggplot(BOD, aes(x='Time', y='demand')) + \ geom_line() + geom_point() 
       
<ggplot: (8769173323185)>
<ggplot: (8769173323185)>
ggsave('Rec.4.2.0.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# Y軸を対数表示を加える(Python版) worldpop = RDf2PandaDf("worldpop") 
       
ggplot(worldpop, aes(x='Year', y='Population')) + \ geom_line() + geom_point() + scale_y_log() 
       
<ggplot: (8769173255973)>
<ggplot: (8769173255973)>
ggsave('fig-4.5.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# Rec.4.3 複数の線を持つ折れ線グラフを作成する(Python版) r('library(plyr)') # ToothGrowthデータを要約する r('tg <- ddply(ToothGrowth, c("supp", "dose"), summarise, length=mean(len))') 
       
  supp dose length
1   OJ  0.5  13.23
2   OJ  1.0  22.70
3   OJ  2.0  26.06
4   VC  0.5   7.98
5   VC  1.0  16.77
6   VC  2.0  26.14
  supp dose length
1   OJ  0.5  13.23
2   OJ  1.0  22.70
3   OJ  2.0  26.06
4   VC  0.5   7.98
5   VC  1.0  16.77
6   VC  2.0  26.14
tg = RDf2PandaDf("tg") 
       
ggplot(tg, aes(x='dose', y='length', colour='supp')) + \ geom_line() 
       
<ggplot: (8769173233133)>
<ggplot: (8769173233133)>
ggsave('Rec.4.3.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# Rec.4.4 線の体裁を変更する(Python版) Rではlinetypeで線種を指定 ggplot(BOD, aes(x='Time', y='demand')) + \ geom_line(linestyle="dashed", color="blue") 
       
Traceback (click to the left of this block for traceback)
...
ggplot.utils.exceptions.GgplotError: u'Cannot recognize argument:
linestyle'
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "_sage_input_94.py", line 10, in <module>
    exec compile(u'open("___code___.py","w").write("# -*- coding: utf-8 -*-\\n" + _support_.preparse_worksheet_cell(base64.b64decode("IyBSZWMuNC40IOe3muOBruS9k+ijgeOCkuWkieabtOOBmeOCiyhQeXRob27niYgp44CAUuOBp+OBr2xpbmV0eXBl44Gn57ea56iu44KS5oyH5a6aCmdncGxvdChCT0QsIGFlcyh4PSdUaW1lJywgeT0nZGVtYW5kJykpICsgXAogICAgZ2VvbV9saW5lKGxpbmVzdHlsZT0iZGFzaGVkIiwgY29sb3I9ImJsdWUiKQ=="),globals())+"\\n"); execfile(os.path.abspath("___code___.py"))
  File "", line 1, in <module>
    
  File "/tmp/tmpcLBMeL/___code___.py", line 3, in <module>
    exec compile(u'ggplot(BOD, aes(x=\'Time\', y=\'demand\')) + \\\n    geom_line(linestyle="dashed", color="blue")
  File "", line 2, in <module>
    
  File "/usr/local/sage-6.7/local/lib/python2.7/site-packages/ggplot-0.6.5-py2.7.egg/ggplot/geoms/geom_line.py", line 16, in __init__
    super(geom_line, self).__init__(*args, **kwargs)
  File "/usr/local/sage-6.7/local/lib/python2.7/site-packages/ggplot-0.6.5-py2.7.egg/ggplot/geoms/geom.py", line 102, in __init__
    raise GgplotError('Cannot recognize argument: %s' % k)
ggplot.utils.exceptions.GgplotError: u'Cannot recognize argument: linestyle'
ggsave('Rec.4.4.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# Rec.4.5 点の体裁を変更する(Python版) R版とはsizeの単位が異なる、記号の形(shape=22)とfillは指定不可 ggplot(BOD, aes(x='Time', y='demand')) + geom_line() + \ geom_point(size=100, color="darkred") 
       
<ggplot: (8769173008937)>
<ggplot: (8769173008937)>
ggsave('Rec.4.5.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# Rec.4.6 網掛け領域付きのグラフを作成する (Python版未完成) r('sunspotyear <- data.frame(Year = as.numeric(time(sunspot.year)), Sunspots = as.numeric(sunspot.year))') sunspotyear = RDf2PandaDf('sunspotyear'); sunspotyear.head() 
       
   Sunspots  Year
0         5  1700
1        11  1701
2        16  1702
3        23  1703
4        36  1704
   Sunspots  Year
0         5  1700
1        11  1701
2        16  1702
3        23  1703
4        36  1704
ggplot(sunspotyear, aes(x="Year", y="Sunspots")) + geom_line() 
       
<ggplot: (8769172999013)>
<ggplot: (8769172999013)>
ggsave('Rec.4.6.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# R版 graph = preGraph("fig-4.17.pdf") r('p <- ggplot(sunspotyear, aes(x=Year, y=Sunspots)) + geom_area()') r('plot(p)') postGraph(graph) 
       
# Rec.4.7 積み上げ面グラフを作成する(ダメ) uspopage = RDf2PandaDf('uspopage'); print uspopage.head() ggplot(uspopage, aes(x="Year", y="Thousands", fill="AgeGroup")) + geom_area() 
       
  AgeGroup  Thousands  Year
0       <5       9181  1900
1     5-14      16966  1900
2    15-24      14951  1900
3    25-34      12161  1900
4    35-44       9273  1900
<repr(<ggplot.ggplot.ggplot at 0x7f9bb6f5d4d0>) failed:
GgplotError: u'geom_area requires the following missing aesthetics:
ymin, ymax'>
  AgeGroup  Thousands  Year
0       <5       9181  1900
1     5-14      16966  1900
2    15-24      14951  1900
3    25-34      12161  1900
4    35-44       9273  1900
<repr(<ggplot.ggplot.ggplot at 0x7f9bb6f5d4d0>) failed: GgplotError: u'geom_area requires the following missing aesthetics: ymin, ymax'>
ggsave('Rec.4.7.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# R版 graph = preGraph("fig-4.20.pdf") r('p <- ggplot(uspopage, aes(x=Year, y=Thousands, fill=AgeGroup)) + geom_area() ') r('plot(p)') postGraph(graph) 
       
# Rec.4.9 信頼区間の領域を追加する # Anomaly10y: 1950~1980年までの平均気温からの偏差の10年移動平均 # Unc10y: 95%の信頼区間 r('clim <- subset(climate, Source == "Berkeley", select=c("Year", "Anomaly10y", "Unc10y"))') clim = RDf2PandaDf('clim') 
       
# 上限、下限の線で代用 up_line = pd.DataFrame({ 'y': (clim.Anomaly10y + clim.Unc10y).tolist(), 'x': clim.Year.tolist()}) lw_line = pd.DataFrame({ 'y': (clim.Anomaly10y - clim.Unc10y).tolist(), 'x': clim.Year.tolist()}) 
       
# linestyle="dashed"が効かない ggplot(clim, aes(x="Year", y="Anomaly10y")) + \ geom_line() + \ geom_line(aes(x="x", y="y"), color="blue", data=up_line) + \ geom_line(aes(x="x", y="y"), color="blue", data=lw_line) 
       
<ggplot: (8769172840921)>
<ggplot: (8769172840921)>
ggsave('Rec.4.9.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# R版 alpha指定が効かない graph = preGraph("fig-4.25.pdf") r('p <- ggplot(clim, aes(x=Year, y=Anomaly10y)) + geom_ribbon(aes(ymin=Anomaly10y-Unc10y, ymax=Anomaly10y+Unc10y)) + geom_line()') r('plot(p)') postGraph(graph) 
       
# Rec.5.1 基本的な散布図を作成する ggplot(heightweight, aes(x="ageYear", y="heightIn")) + geom_point() 
       
<ggplot: (8769172800873)>
<ggplot: (8769172800873)>
ggsave('Rec.5.1.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# R5.2 色と形を使用してデータポイントをグループ化 ggplot(heightweight, aes(x="ageYear", y="heightIn", color="sex")) + geom_point() 
       
<ggplot: (8769172752329)>
<ggplot: (8769172752329)>
ggsave('fig-5.4a.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
ggplot(heightweight, aes(x="ageYear", y="heightIn", shape="sex")) + geom_point() 
       
<ggplot: (8769172790025)>
<ggplot: (8769172790025)>
ggsave('fig-5.4b.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# Rec.5.3 点の形を指定する ggplot(heightweight, aes(x="ageYear", y="heightIn")) + geom_point(shape=3) 
       
<ggplot: (8769172609373)>
<ggplot: (8769172609373)>
# R版 graph = preGraph("Rec.5.3.pdf") r('p <- ggplot(heightweight, aes(x=ageYear, y=heightIn)) + geom_point(shape=3)') r('plot(p)') postGraph(graph) 
       
# Rec.5.4 連続値変数を色やサイズにマッピングする ggplot(heightweight, aes(x="ageYear", y="heightIn", colour="weightLb")) + geom_point() 
       
<ggplot: (8769172585881)>
<ggplot: (8769172585881)>
ggsave('Rec.5.4.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
ggplot(heightweight, aes(x="ageYear", y="heightIn", size="weightLb")) + geom_point() 
       
<ggplot: (8769172538865)>
<ggplot: (8769172538865)>
ggsave('fig-5.9.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# Rec.5.5 オーバープロットを扱う ggplot(diamonds, aes('carat', 'price')) + \ geom_point(alpha=1/20.) + \ ylim(0, 20000) 
       
<ggplot: (8769172410877)>
<ggplot: (8769172410877)>
ggsave('Rec.5.5.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# Rec.5.6 回帰モデルの直線をフィットさせる ggplot(heightweight, aes(x="ageYear", y="heightIn")) + geom_point() + stat_smooth(method="lm", se=True) 
       
<ggplot: (8769172388325)>
<ggplot: (8769172388325)>
ggsave('Rec.5.6.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
ggplot(heightweight, aes(x="ageYear", y="heightIn")) + geom_point() + stat_smooth() 
       
<ggplot: (8769172362685)>
<ggplot: (8769172362685)>
ggsave('fig-5.19.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# Rの結果は graph = preGraph("fig-5.19.pdf") r('p <- ggplot(heightweight, aes(x=ageYear, y=heightIn)) + geom_point() + stat_smooth()') r('plot(p)') postGraph(graph) 
       
# glmでのスムーズ曲線 r('library(MASS)') graph = preGraph("fig-5.20.pdf") r('b <- biopsy') r('b$classn[b$class == "benign"] <- 0') r('b$classn[b$class == "malignant"] <- 1') r('p <- ggplot(b, aes(x=V1, y=classn)) + geom_point(position=position_jitter(width=0.3, height=0.06)) + stat_smooth(method=glm, family=binomial)') r('plot(p)') postGraph(graph) 
       
# 不要な散布図のレシピは省略 
       
# Rec.6.1 基本的なヒストグラムを作成する faithful = RDf2PandaDf('faithful') ggplot(faithful, aes(x="waiting")) + geom_histogram() 
       
<ggplot: (8769172254553)>
<ggplot: (8769172254553)>
ggsave('Rec.6.1.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# binwidthを1.8に指定 ggplot(faithful, aes(x="waiting")) + geom_histogram(binwidth=1.8) 
       
<ggplot: (8769171613869)>
<ggplot: (8769171613869)>
ggsave('fig-6.2.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# Rec.6.2 グループ化されたデータから複数のヒストグラムを作成する birthwt = RDf2PandaDf('birthwt') ggplot(birthwt, aes(x="bwt")) + geom_histogram(color="grey") + facet_wrap("smoke") 
       
<ggplot: (8769171329417)>
<ggplot: (8769171329417)>
ggsave('fig-6.4.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# Rec.6.3 密度曲線を作成する ggplot(faithful, aes(x="waiting")) + geom_density() 
       
<ggplot: (8769171106017)>
<ggplot: (8769171106017)>
ggsave('fig-6.3.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# Rec.6.4 グループ化されたデータから複数の密度曲線を作成する ggplot(birthwt, aes(x="bwt", colour="smoke")) + geom_density() 
       
<ggplot: (8769171036313)>
<ggplot: (8769171036313)>
ggsave('Rec.6.4.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# Rec.6.6 基本的な箱ひげ図を作成する graph = preGraph("Rec.6.6.pdf") r('p <- ggplot(birthwt, aes(x=factor(race), y=bwt)) + geom_boxplot()') r('plot(p)') postGraph(graph) 
       
# Rec.6.12 2次元データから密度プロットを作成する graph = preGraph("Rec.6.12.pdf") r('p <- ggplot(faithful, aes(x=eruptions, y=waiting)) + geom_point() + stat_density2d()') r('plot(p)') postGraph(graph) 
       
# Rec.9.3 テーマを使う # ブラックとホワイトのテーマ ggplot(heightweight, aes(x="ageYear", y="heightIn")) + geom_point() + theme_bw() 
       
<ggplot: (8769171118737)>
<ggplot: (8769171118737)>
ggsave('Rec.9.3.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# Rec.11.1 ファセットを使いサブプロットに分割する mpg = RDf2PandaDf('mpg') ggplot(mpg, aes(x="displ", y="hwy")) + geom_point() + facet_grid("drv", "cyl") 
       
<ggplot: (8769170935665)>
<ggplot: (8769170935665)>
ggsave('Rec.11.1.png', dpi=50) 
       
Saving 11.0 x 8.0 in image.
Saving 11.0 x 8.0 in image.
# R版では、水平、垂直パネルにも分割できる graph = preGraph("fig-11.1.pdf") r('p <- ggplot(mpg, aes(x=displ, y=hwy)) + geom_point()') r('p <- p + facet_grid(drv ~ .)') #r('p + facet_grid(. ~ cyl)') #r('p + facet_grid(drv ~ cyl)') r('plot(p)') postGraph(graph) 
       
# 地図のプロット # r('install.packages("mapproj")') r('library(maps)') 
       
 [1] "maps"      "gcookbook" "ggplot2"   "stats"     "graphics" 
"grDevices" "utils"     "datasets" 
 [9] "methods"   "base"     
 [1] "maps"      "gcookbook" "ggplot2"   "stats"     "graphics"  "grDevices" "utils"     "datasets" 
 [9] "methods"   "base"     
# アメリカの地図データを取得 junk = r('states_map <- map_data("state")') 
       
r('class(states_map)') graph = preGraph("fig-13.32.pdf") r('p <- ggplot(states_map, aes(x=long, y=lat, group=group)) + geom_polygon(fill="white", colour="black")') r('plot(p)') postGraph(graph) 
       
# 世界地図から日本と韓国、中国をプロット r('world_map <- map_data("world")') junk = r('east_asia <- map_data("world", region=c("Japan", "China", "North Korea", "Sourth Korea"))') 
       
graph = preGraph("fig-13.33.pdf") r('p <- ggplot(east_asia, aes(x=long, y=lat, group=group, fill=region)) + geom_polygon( colour="black") + scale_fill_brewer(palette="Set2")') r('plot(p)') postGraph(graph) # グラフがゆがんでいるのは、要チェックです。 
       
# 塗り分け地図(コロプレス地図) r('states_map = map_data("state")') r('crimes <- data.frame(state = tolower(rownames(USArrests)), USArrests)') r('crime_map <- merge(states_map, crimes, by.x="region", by.y="state")') graph = preGraph("fig-13.35.pdf") r('p <- ggplot(crime_map, aes(x=long, y=lat, group=group, fill=Assault)) + geom_polygon( ) + coord_map("polyconic")') r('plot(p)') postGraph(graph) 
       
# 日本地図 # r('install.packages("raster")') r('library(raster)') # シェープファイルを読み込む場合は、readShapePoly関数を使用する 
       
r('japan_shp <- getData("GADM", country="JPN", level=1)') r('japan_map <- fortify(japan_shp)') graph = preGraph("fig-13.40.pdf") r('p <- ggplot(japan_map, aes(x=long, y=lat, group=group)) + geom_path(lwd=0.5)') r('plot(p)') postGraph(graph) # PDFからPNGへのconvertにとても時間がかかります。 
       
# arrow用にgridパッケージをロード r('libraray(grid)') # プロットデータの加工 junk = r('islice <- subset(isabel, z == min(z))') 
       
# ベクトルフィールドのプロット # 動かなくなっている graph = preGraph("fig-13.22.pdf") r('p <- ggplot(islice, aes(x=x, y=y)) + geom_segment(aes(xend = x+vx/50, yend = y+vy/50), arrow= arrow(length = unit(0.1, "cm"), size = 0.25)') r('plot(p)') postGraph(graph) 
       
   **** Warning:  Invalid Page count.
   **** Warning:  Invalid Page count.

   **** This file had errors that were repaired or ignored.
   **** The file was produced by: 
   **** >>>> R 3.2.0 <<<<
   **** Please notify the author of the software that produced this
   **** file that it does not conform to Adobe's published PDF
   **** specification.

convert: Postscript delegate failed `fig-13.22.pdf':
そのようなファイルやディレクトリはありません @ error/pdf.c/ReadPDFImage/664.
convert: missing an image filename `fig-13.22.png' @
error/convert.c/ConvertImageCommand/3015.
   **** Warning:  Invalid Page count.
   **** Warning:  Invalid Page count.

   **** This file had errors that were repaired or ignored.
   **** The file was produced by: 
   **** >>>> R 3.2.0 <<<<
   **** Please notify the author of the software that produced this
   **** file that it does not conform to Adobe's published PDF
   **** specification.

convert: Postscript delegate failed `fig-13.22.pdf': そのようなファイルやディレクトリはありません @ error/pdf.c/ReadPDFImage/664.
convert: missing an image filename `fig-13.22.png' @ error/convert.c/ConvertImageCommand/3015.