3 υ_$@sddlmZmZmZddlmZmZmZddlm Z m Z m Z ddl m Z mZmZddlmZmZmZGdddeZGdd d eZGd d d eZGd d d eZGdddeZGdddeZGdddeZdS))EUCTW_CHAR_TO_FREQ_ORDEREUCTW_TABLE_SIZE EUCTW_TYPICAL_DISTRIBUTION_RATIO)EUCKR_CHAR_TO_FREQ_ORDEREUCKR_TABLE_SIZE EUCKR_TYPICAL_DISTRIBUTION_RATIO)GB2312_CHAR_TO_FREQ_ORDERGB2312_TABLE_SIZE!GB2312_TYPICAL_DISTRIBUTION_RATIO)BIG5_CHAR_TO_FREQ_ORDERBIG5_TABLE_SIZEBIG5_TYPICAL_DISTRIBUTION_RATIO)JIS_CHAR_TO_FREQ_ORDERJIS_TABLE_SIZEJIS_TYPICAL_DISTRIBUTION_RATIOc@sLeZdZdZdZdZdZddZddZd d Z d d Z d dZ ddZ dS)CharDistributionAnalysisigGz?g{Gz?cCs0d|_d|_d|_d|_d|_d|_|jdS)N)_char_to_freq_order _table_sizetypical_distribution_ratio_done _total_chars _freq_charsreset)selfr&/usr/lib/python3.6/chardistribution.py__init__.sz!CharDistributionAnalysis.__init__cCsd|_d|_d|_dS)zreset analyser, clear any stateFN)rrr)rrrrr=szCharDistributionAnalysis.resetcCsX|dkr|j|}nd}|dkrT|jd7_||jkrTd|j|krT|jd7_dS)z"feed a character with known lengthrriN) get_orderrrrr)rcharZchar_lenorderrrrfeedFs  zCharDistributionAnalysis.feedcCsT|jdks|j|jkr|jS|j|jkrN|j|j|j|j}||jkrN|S|jS)z(return confidence based on existing datar)rrMINIMUM_DATA_THRESHOLDSURE_NOrSURE_YES)rrrrrget_confidenceTs   z'CharDistributionAnalysis.get_confidencecCs |j|jkS)N)rENOUGH_DATA_THRESHOLD)rrrrgot_enough_datadsz(CharDistributionAnalysis.got_enough_datacCsdS)Nrr r)rbyte_strrrrr!isz"CharDistributionAnalysis.get_orderN) __name__ __module__ __qualname__r*r'r&r%rrr$r)r+r!rrrrr(s rcs$eZdZfddZddZZS)EUCTWDistributionAnalysiscs$tt|jt|_t|_t|_dS)N) superr0rrrrrrr)r) __class__rrrrsz"EUCTWDistributionAnalysis.__init__cCs0|d}|dkr(d|d|ddSdSdS)Nr^rr r)rr, first_charrrrr!xsz#EUCTWDistributionAnalysis.get_order)r-r.r/rr! __classcell__rr)r2rr0qs r0cs$eZdZfddZddZZS)EUCKRDistributionAnalysiscs$tt|jt|_t|_t|_dS)N) r1r8rrrrrrr)r)r2rrrsz"EUCKRDistributionAnalysis.__init__cCs0|d}|dkr(d|d|ddSdSdS)Nrr4rr5r r)rr,r6rrrr!sz#EUCKRDistributionAnalysis.get_order)r-r.r/rr!r7rr)r2rr8s r8cs$eZdZfddZddZZS)GB2312DistributionAnalysiscs$tt|jt|_t|_t|_dS)N) r1r:rrrr rr r)r)r2rrrsz#GB2312DistributionAnalysis.__init__cCs>|d|d}}|dkr6|dkr6d|d|dSdSdS)Nrrr9r5r4r r)rr,r6 second_charrrrr!sz$GB2312DistributionAnalysis.get_order)r-r.r/rr!r7rr)r2rr:s r:cs$eZdZfddZddZZS)Big5DistributionAnalysiscs$tt|jt|_t|_t|_dS)N) r1r<rr rr rr r)r)r2rrrsz!Big5DistributionAnalysis.__init__cCsX|d|d}}|dkrP|dkr:d|d|ddSd|d|dSndSdS) Nrrr5?@r r)rr,r6r;rrrr!s z"Big5DistributionAnalysis.get_order)r-r.r/rr!r7rr)r2rr<s r<cs$eZdZfddZddZZS)SJISDistributionAnalysiscs$tt|jt|_t|_t|_dS)N) r1rArrrrrrr)r)r2rrrsz!SJISDistributionAnalysis.__init__cCsr|d|d}}|dkr0|dkr0d|d}n&|dkrR|dkrRd|dd}nd S||d }|d krnd }|S) Nrrr@r r r)rr,r6r;r#rrrr!s z"SJISDistributionAnalysis.get_order)r-r.r/rr!r7rr)r2rrAs rAcs$eZdZfddZddZZS)EUCJPDistributionAnalysiscs$tt|jt|_t|_t|_dS)N) r1rIrrrrrrr)r)r2rrrsz"EUCJPDistributionAnalysis.__init__cCs0|d}|dkr(d|d|ddSdSdS)Nrr4r5rr r)rr,r"rrrr!sz#EUCJPDistributionAnalysis.get_order)r-r.r/rr!r7rr)r2rrIs rIN)Z euctwfreqrrrZ euckrfreqrrrZ gb2312freqrr r Zbig5freqr r r Zjisfreqrrrobjectrr0r8r:r<rArIrrrrsI