A Novel Approach for Prediction of Protein Subcellular Localization from Sequence Using Fourier Analysis and Support Vector Machines Zhengdeng Lei
Department of Bioengineering University of Illinois at Chicago 851 S. Morgan Street Chicago, IL 60607, USA
[email protected] [email protected] );*+,$7 < )
(), -+ $ =,> (. $,/& ?0 @!1,$A& 2 !"3B=)CDEA$#F)G, $(=46$5H &%!"),&!')-,7?&,&& 9!8(, : G,&S!1&I( U 4I 39()MV,&$$,& NJ& &,&W!D$G86, (@' ,)*$ ($IA9O8=($,&, 7 98X K,$ -$P*,Q8-!R,L,S T, 35))&H)$ : B !1$CM&N (I) (,G-N, GS,$!", (.,4# Y& !HZ BL3))CM+&Q$VN &,G$&,#,&,' @VP,&!"$G&LY)ET Y# ,' P(I$AN&(!;))*:: $$9#4X,PK$9$ !",&) 7 98`,( +G,&G ,& 980V !"\,&$[S]/^Ia_!R,$&7`,/@S+)$) : ,,7*+b )[d'c ^):f!"&+ (NW !1$)&& LSL:q &kh,&m V(j'*,/noGCh,& :Oy&p BJ vd @#,(8sP*+F+{$E !K` 27,#$ ()4& Ge,/& !ozS8IS, &,&@$)s$Gb7,&_(kh(m)j',&no =Ch:Op : $&L P_(T,&+ B
ABSTRACT
k?5 &)(|ECymG),*+$S!1& ) 6 (,Ji6m}y)$+,& (. ,~o $ZJINtE6G,&, $A:d 4 B ,>e*7,$(,J
Keywords
E 4&,&INTRODUCTION $ = V)T&) I&*+ $M?,&!;4 I4&'&, 7_h 3&)$B 4ECM,A G&,:: S)$)&!,,PsQ*+L&!%9{7&$K, ( *3G$),&& Q:ds)!K* ' 3) &,/($$&&) QJH(,&7,4GJ+,9*7!1E),&7,$A: 6[dm,T6 LnoVC'_JDvqG,A (HGk; ,&#&J?!Dk;,/nLG,'jQ mvS#C H^N34L)&-$G$Fh(G3!",&)$AMJ X$ L,)*+ 4s)$'( I#!"& ! *+)*+W$ ()> (,L + $ s,& *. ,&, &Y -Y c7-7)Ic&&7!hc&/fIBML_:fA 7 ,& D, >&N, (4 4 ,/$A,& X(94s] I cx]fB ] -c&-c cIc&-c7M,,&h$$-)G,&$8-4 !"NM ,A $ ,&& !y. ,/( &$Bh CM#$ ,&A : !"$& V J* &(So4N&*>)AGS,&H AA;,&J &,&$GH,&&!+gT$A4G, @,>8-T*+,&L,N ),&9$GG,&** (8 ,& y ($$,&& o. ,!" ,QB +kH$m9{j'$Lno Cz$ ,L (,&.,6 FP, & ,N N4&&!=A)($G$L6,V)$$$&HS!"!1)
S87$S,/- 6$ 9,&8IB`%)N(4&S>G, (&4 +)I %0 : $Q4#7,/$G, @N!1,&&7 s+ @gMA !") ; / @8X,&$A!L( I ,/S$ ,4_`,&e&,G, 4-$G(3)B vO&!hSI)7)8*+J $gh' ) (,;$ *+$ ', (,Q. ,/gb V,!K ,&$G&!"&H- T3)$$=) S : 4#5&)',&7, 98',&EQ!" ,/)-FG,&$4& ;,&_):
pHvj'QNQ;_ P 6Q,/G,_4V(6ph !1& @,$-["gT9YmvSt'QN &!1$/^
7,4 ]&]
+,@ VFY G,,&$$$'K&0I @$A&L @,&$GB 53) &)$|s6!1,& 7 (L, 9% 8/ Ec 6 ,&7,&,P7[d\*+c ^, $AyJ&gMM *7,, N @4 &?*7,&,& i, ;-,&$$(@($ 4' +&9(-&y*Bo,&5 &,V7_ H,&eT&*+!o:d+P-)( H@!K!" 3Es))N($84B? CMI$o,& 6$P!?(,&$4N(Y$G_$ !" & H K(+Io&,9]f&B F : j'(&,%)\N fBX!vd)V*+s$V( ' () ( ,&,Y )7 S,$G,,&e . X,,/4>*,(&8X(SY-!"khWmj'tE)noG 9Ch,& :O&pb:d!E!",e4 Nx/,:f!1 >- (e0*7$, $$S::: &,&$ ( ,/D ,&7$,/(;,/$A: OghHBo }Z4 V8:&9!c ; H()? 3) @$,&= =SH 3)) V*+!E^h[S]/)^E*+$& () (Y,T $$ &,&7 . ,,/8Y(&SJi),$A7)J;E[d[ c&Z^E$ ,$8 : &!?N$4 7,L*+6SG,* iB !1vOoe,&3$G&I)GQ$A88QJh_,Y!>$$AH&>$,P!(c3,)]Y I$gT(V 9gTI9,LW+ &V-* M9:q +73,/)9( i B &!' se, 3)$,$(JN9B\sms(,&$_ z6%S8 V*+W ] As_^&X!- @,e8`&,4 + ,/,&
gT!1c&%&7 S8/ Vgu*+-gT&{ (9PYSQ&,!-,& W4P,&6 ,$&(!?Y`,$4 Y>P,X ,&,0 ( #4G,&,&$ Q(sQ!@!1 3c )]&Bl$ &!o$ZB@G-; >>`$AgT8e& /a4_$>&@$N,#I$ PBH,IH,$AT#+! +& K, *+[S ] _ *O8 6^ [f>^''(NW6L, Ve*_G,&E ;$ *7,, (. ,/e ` c$+c&[ ZJf STS2R\^]b!"
=> G],/, @QS!1]?&CE4I @ ,&&4E=!1J;&T9M* @,, I84&G,&9 {7 ;$ ,A: ]) @ 66A*6 (6^ 2E,&!MI&V&,&)4>( ,. >,-$ &X SG,&{Q e! KV,&L0*+/ / >&BCM6*A8P!"/ I JD),& ,$$Ae& b$ ,&,&$ $. ),G,&&$A08a!-& BEx s /> h/>TN ) ) 9L!;kHmj'noCzvAB !1)$ 6 ph ( oh V9:d+E A(M,&7-*7, @5? 5?@C0*7E,,& V I,&$(@$J gM T+,& (9 : ED[%o^'9 I ! / 1?[%d^ ?[%;^ 2( 9 I ! / #[%1 3;^ 2( KVL K/L ZBVCMF+ Lg ,&Q$, W&)Q)46,Px/:f!1 (W$&S: 5&V )4 G,( -L&!EAm{7~Q9_ _,/$( eK*,&$ AK,%[f, ),( (z,*( ,/5!47]68&68>9:=$=$=?@9BA8gTCED$96z?CED$FFG+;>^N 6G,L ( 98aF+A BM %vd' $&$ ,& . S,: ,&#$,& h[RE+&9( Q ($A(&6Mqt &,& )/^A $,& R 9 Mut E 8
&4 WB| CM$ ,Y (.,,/G ,&\#$&&% c 0$s8] &>c (, ($&JNc 0(_# ,/P , : *$G ,& () ( ,J cT 9I+,&9 (,& ,& $ 98`J $x c-G&,&)M,Y V*&G!-,&]&J]_,&Y&] IVAFGS, : gT["AZ B;Mqt`4 [",>y/2B ^;M18y^ &(L(h/L^ABo$ CM L) I$,&H @,L7,/V G,&$ 4&, 7h&!+!ZLTx/ :f,!"& () K;$!1& S:
,&$(4 J?ShP& 4&3)!M$ VV{7(o, H5?,G5?,&CAho$,&*+&X)o> &3 )x BB c x c BB >x c x&BB x& x ]&BBc&]/c >BB &BB &BB k=j')T (,& V$ *G, >]&BB c] &xBBx&]/c >xBB >c& x&BB & >x&xBB & &xBB c& &] BB &x&BB ;& F+G,V $9 () (,? Q B@ )4x&m~QB gT9_>cB x N !" ,/)x& B x&!K >Qc,B] ,$ (_B &$ & + 9B B 5?5?9:dC+KNV ;N #A)4@Pm)~Q 4@gTm9~Q_gT'9!1_ ,)E!" ,/!1)& lT!1Q& 5?5?CN&!?9:d8+&(E&*!1$39O)8@$8i$ (4 kHmj'noCh:qp\N4G, _ $L(X /fBhCMN) 9E,N!K tN,G8 /fB ghCM,G ,Q $,& ) ;6, !" gT S I?!1& ]'_]/xIgT9s,&6$ G,& i.SI4E&V)4&$I& 7M,9&:d+IgT9KIh!1H3)9:d+$A8V,(H>,&$$G,&4Eo ,'4 i!K: J $,& 98@ (G,,/4& LV)!7 V*+?H*+&!y!", ,/) B;CMB;X 5?'5?C0&+ $'IE!1 3 ) /gT$4 )4 s, ( i Bo($vOV&, ' _ @9,&6$G y,GJ,&,EL !" R9B : };;!1& l76]Q76 IgT9w}:9.} 6 cI!"&E ,$APSyB CMCMsA '5?75?,/C0N*7,!MkH Imj' noCh:qpzI!"&' -SG, ($A '6 !1&L/>Q h'&P V'*$G&,E (+&$,&7 . (,4I Z$Ji,&!1 D M!1&&7 6 /B > o& VM*(, $@>gTM9+PA!"&E @,9:d+$&B;CM(E;*7$,,&IP*+ h , (. @*, 88 #6 766 @P!1!1&&E- 3) 77$,h$,& IX*h9:dE+(!"( @#, $@!1 & 3)'5?$5?CB A? e,& V$G*G,> ;W,7)Q*SFG,G, K$, ; () (,; />$ , (.,/N eo,$=,&$ o !"&>c 2^ $`'h @4,$A!1)($= !"7,h =&:: $ STU>m O W U m 9J=] x Jy] xB c &$J&Wx >!hx :q>x &B e)*+$ ) (,/Q $ , Z B O W U m K o 9J 9] /-}y J B(J;ZJTBJ yJ-BJS&yJ B,7@' * J k;B=[dc >c& ^E K,_9{7 @$ ,/,/$A(&YymBQ &)4Q,& N>A T(!1&&z'N&!>c "!$# S%(",& &(fJ;' -!)*B=) , 70,m+7#,&gM"A.:OCD- , 8 # &/ %0J 21BMJ [d c ,&& I>*^ (4& `E9: 7,&+A B DCD,+ ,/L(6ph !1& @,$B >9q8_k?B 9] /I86o ,&c"S3kmOJE8C'oBNVPM[SU] DGrR^ Q IGkW i$jlKIk>SmS k>j pLI0UBK4c"_3hk>oEiBeGo U7U ]JD+3BEU7hpho3rrT WajBiNMW V g h| x ,&tI)B[d c& &&>y^=Jk?jIB JE$ 4EyJ)I*+$BJpH )) (,?,& +J$ ,mi B(.,&,# #!'&9:: _b 8VUm B STU mo fO*7,W U m 9VJi &&VJD] 9o x/E:]:f] B ,& ,& N,&$( 3)$ *B $ c&/I_ JB WB };B(JE N
,> *ZJ/IB/,7QEgh (A J n[dc ^ $ 4_ ,P7,/S (,^6@k? &$ ,& (u. ,/(&&yBPJ4ap F4 ]J,&3 YUGV,&o3WajWb 5Kl o($A 9: J xJ+c &/:q&&B /Wqc p7^;r;E:5o3r&,/( ( c^&khm(j''no,C'76;,W 4&$G, 4 c&/j p) G,&oyN
j oV%Wqh3rJix&J+c /:O&>cB c-CD)4@7,, (8J+@tI,&B $ (6Bi,&$&s +m 9&y J7YvGBD&!;[S] &>4^EGk?,& = $ I (*G',4 /N>A&:: c&&J>o ,/:Ox& ( $,/B P- +& (&4&8#$ Z$B $ 0STU m fO WqU>m J c-CDG),&7, 8 VJE*tIGB,&TBT%,7&+ m ( 48w&yJTvG BN([d$Ac&&(& ] ^_CMA>Y Q[B sO W 6U WaCTj0X jNU k N _kGV%W hyrJZ]/J+>:q x&B & h5&,&)yJWMBGJ,}ZS(Z!"J =B mi,BN,&7 ,&$Gi!"&,&M4J & B C'(@B$[S] 4-^W ,&)g *7 ,x/:q @& B L!"& @,/M!D@ _$)> GB O W U>WYj0X U_kGV%W hyrJD]J
pHvj'QNQ;_ P 6Q,/G,_4V(6ph !1& @,$-["gT9YmvSt'QN &!1$/^
7,4 ]
Hydrophobicity
Periodogram
1
1
0.8
0.9
0.6
0.8
0.4
0.7
0.2
0.6
0
0.5
−0.2
0.4
−0.4
0.3
−0.6
0.2
−0.8
0.1
−1
0
50
100
150 AA
200
250
300
0
0
200
400
600
800
Hydrophobicity
1000 cycles/AA
1200
1400
1600
1800
2000
1200
1400
1600
1800
2000
1200
1400
1600
1800
2000
1200
1400
1600
1800
2000
Periodogram
1
1.4
0.8 1.2
0.6 1
0.4
0.2 0.8
0 0.6
−0.2
−0.4
0.4
−0.6 0.2
−0.8
−1
0
50
100
150
200
250
0
0
200
400
600
800
1000 cycles/AA
5=4 )IcHCMN$ _ 3)$ !1&TOghAE V*G,LT*+!"@[R (A!1A^ ,&_,!KI["(4&A^HN!R,SE5 )(ALG,S!1& 6B AA
Hydrophobicity
Periodogram
1
0.9
0.8
0.8
0.6
0.7
0.4 0.6
0.2 0.5
0 0.4
−0.2 0.3
−0.4 0.2
−0.6
0.1
−0.8
−1
0
100
200
300
400
500
600
700
0
0
200
400
600
800
AA Hydrophobicity
1000 cycles/AA Periodogram
1
1.4
0.8 1.2
0.6 1
0.4
0.2 0.8
0 0.6
−0.2
−0.4
0.4
−0.6 0.2
−0.8
−1
0
100
200
300
400
500
600
700
800
900
0
0
200
400
600
800
1000 cycles/AA
5=(4&)-oCMQ$ _3)$M!"EqgM&)L I*G,&E&L*+A!"&@[R !KA^T,7P,!1AV["4 A^HN!R,&SE5&)TG,&S!" 6B AA
pHvj'QNQ;_ P 6Q,/G,_4V(6ph !1& @,$-["gT9YmvSt'QN &!1$/^
7,4 ]