Algoritma Stemming Nazief dan Adriani
Algoritma Nazief dan adriani untuk melakukan stemming berbahasa indonesia
class Stemmer {
public function cekKamus($kata){
//$sql = "SELECT * from tb_katadasar where katadasar ='$kata' LIMIT 1";
$sql = DB::table('tb_katadasar')->where('katadasar',$kata)->first();
if($sql){
return true;
}else{
return false;
}
}
/*============= Stemming dengan Metode Nazief and Adriani’s Algorithm ===============================*/
/*
DP + DP + DP + root word + DS + PP + P
DP : Derivation Prefix
DS : Derivation Suffix
PP : Possessive Pronoun (Inflection) [ku,mu,nya]
P : Particle (Inflection) [lah,kah,]
*/
// Hapus Inflection Suffixes (“-lah”, “-kah”, “-ku”, “-mu”, atau “-nya”)
public function Del_Inflection_Suffixes($kata){
$kataAsal = $kata;
if(preg_match('/([km]u|nya|[kl]ah|pun)$/',$kata)){ // Cek Inflection Suffixes
$__kata = preg_replace('/([km]u|nya|[kl]ah|pun)$/','',$kata);
if(preg_match('/([klt]ah|pun)$/',$kata)){ // Jika berupa particles (“-lah”, “-kah”, “-tah” atau “-pun”)
if(preg_match('/([km]u|nya)$/',$__kata)){ // Hapus Possesive Pronouns (“-ku”, “-mu”, atau “-nya”)
$__kata__ = preg_replace('/([km]u|nya)$/','',$__kata);
return $__kata__;
}
}
return $__kata;
}
return $kataAsal;
}
// Cek Prefix Disallowed Sufixes (Kombinasi Awalan dan Akhiran yang tidak diizinkan)
public function Cek_Prefix_Disallowed_Sufixes($kata){
if(preg_match('/^(be)[[:alpha:]]+(i)$/',$kata)){ // be- dan -i
return true;
}
if(preg_match('/^(di)[[:alpha:]]+(an)$/',$kata)){ // di- dan -an
return true;
}
if(preg_match('/^(ke)[[:alpha:]]+(i|kan)$/',$kata)){ // ke- dan -i,-kan
return true;
}
if(preg_match('/^(me)[[:alpha:]]+(an)$/',$kata)){ // me- dan -an
return true;
}
if(preg_match('/^(se)[[:alpha:]]+(i|kan)$/',$kata)){ // se- dan -i,-kan
return true;
}
return false;
}
// Hapus Derivation Suffixes (“-i”, “-an” atau “-kan”)
public function Del_Derivation_Suffixes($kata){
$kataAsal = $kata;
if(preg_match('/(i|an)$/',$kata)){ // Cek Suffixes
$__kata = preg_replace('/(i|an)$/','',$kata);
if($this->cekKamus($__kata)){ // Cek Kamus
return $__kata;
}
/*-- Jika Tidak ditemukan di kamus --*/
if(preg_match('/(kan)$/',$kata)){ // cek -kan
$__kata__ = preg_replace('/(kan)$/','',$kata);
if($this->cekKamus($__kata__)){ // Cek Kamus
return $__kata__;
}
}
if($this->Cek_Prefix_Disallowed_Sufixes($kata)){
return $kataAsal;
}
}
return $kataAsal;
}
// Hapus Derivation Prefix (“di-”, “ke-”, “se-”, “te-”, “be-”, “me-”, atau “pe-”)
public function Del_Derivation_Prefix($kata){
$kataAsal = $kata;
/* ------ Tentukan Tipe Awalan ------------*/
if(preg_match('/^(di|[ks]e)/',$kata)){ // Jika di-,ke-,se-
$__kata = preg_replace('/^(di|[ks]e)/','',$kata);
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata__ = $this->Del_Derivation_Suffixes($__kata);
if($this->cekKamus($__kata__)){
return $__kata__;
}
/*------------end “diper-”, ---------------------------------------------*/
if(preg_match('/^(diper)/',$kata)){
$__kata = preg_replace('/^(diper)/','',$kata);
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata__ = $this->Del_Derivation_Suffixes($__kata);
if($this->cekKamus($__kata__)){
return $__kata__;
}
/*-- Cek luluh -r ----------*/
$__kata = preg_replace('/^(diper)/','r',$kata);
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata__ = $this->Del_Derivation_Suffixes($__kata);
if($this->cekKamus($__kata__)){
return $__kata__;
}
}
/*------------end “diper-”, ---------------------------------------------*/
}
if(preg_match('/^([tmbp]e)/',$kata)){ //Jika awalannya adalah “te-”, “me-”, “be-”, atau “pe-”
/*------------ Awalan “te-”, ---------------------------------------------*/
if(preg_match('/^(te)/',$kata)){ // Jika awalan “te-”,
/* Cara Menentukan Tipe Awalan Untuk Kata Yang Diawali Dengan “te-”
Following Characters
Set 1 Set 2 Set 3 Set 4 Tipe Awalan
1. “-r-“ “-r-“ - - none
2. “-r-“ Vowel (aiueo) - - ter-luluh
3. “-r-“ not(vowel or “-r-”) “-er-“ vowel ter
4. “-r-“ not(vowel or “-r-”) “-er-“ not vowel ter-
5. “-r-“ not(vowel or “-r-”) not “-er-“ - ter
6. not(vowel or “-r-”) “-er-“ vowel - none
7. not(vowel or “-r-”) “-er-“ not vowel - te
*/
if(preg_match('/^(terr)/',$kata)){ // 1.
return $kata;
}
if(preg_match('/^(ter)[abcdefghijklmnopqrstuvwxyz]/',$kata)){ // 2.
$__kata = preg_replace('/^(ter)/','',$kata);
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata__ = $this->Del_Derivation_Suffixes($__kata);
if($this->cekKamus($__kata__)){
return $__kata__;
}
}
if(preg_match('/^(ter[^aiueor]er[aiueo])/',$kata)){ // 3.
$__kata = preg_replace('/^(ter)/','',$kata);
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata__ = $this->Del_Derivation_Suffixes($__kata);
if($this->cekKamus($__kata__)){
return $__kata__;
}
}
if(preg_match('/^(ter[^aiueor]er[^aiueo])/',$kata)){ // 4.
$__kata = preg_replace('/^(ter)/','',$kata);
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata__ = $this->Del_Derivation_Suffixes($__kata);
if($this->cekKamus($__kata__)){
return $__kata__;
}
}
if(preg_match('/^(ter[^aiueor][^(er)])/',$kata)){ // 5.
$__kata = preg_replace('/^(ter)/','',$kata);
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata__ = $this->Del_Derivation_Suffixes($__kata);
if($this->cekKamus($__kata__)){
return $__kata__;
}
}
if(preg_match('/^(te[^aiueor]er[aiueo])/',$kata)){ // 6.
return $kata; // return none
}
if(preg_match('/^(te[^aiueor]er[^aiueo])/',$kata)){ // 7.
$__kata = preg_replace('/^(te)/','',$kata);
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata__ = $this->Del_Derivation_Suffixes($__kata);
if($this->cekKamus($__kata__)){
return $__kata__;
}
}
}
/*------------end “te-”, ---------------------------------------------*/
/*------------ Awalan “me-”, ---------------------------------------------*/
if(preg_match('/^(me)/',$kata)){ // Jika awalan “me-”,
/* Cara Menentukan Tipe Awalan Untuk Kata Yang Diawali Dengan “me-”
Following Characters
Set 1 Set 2 Set 3 Set 4 Tipe Awalan
1. “-ng-“ Vowel [kghq] - - meng-
2. “-ny-“ Vowel (aiueo) - - meny-s
3. “-m-“ [bfpv] - - mem-
4. “-n-“ [cdjsz] - - men-
5. - - - - me-
*/
if(preg_match('/^(meng)[aiueokghq]/',$kata)){ // 1.
$__kata = preg_replace('/^(meng)/','',$kata);
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata__ = $this->Del_Derivation_Suffixes($__kata);
if($this->cekKamus($__kata__)){
return $__kata__;
}
/*--- cek luluh k- --------*/
$__kata = preg_replace('/^(meng)/','k',$kata); // luluh k-
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata__ = $this->Del_Derivation_Suffixes($__kata);
if($this->cekKamus($__kata__)){
return $__kata__;
}
}
if(preg_match('/^(meny)/',$kata)){ // 2.
$__kata = preg_replace('/^(meny)/','s',$kata);
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata__ = $this->Del_Derivation_Suffixes($__kata);
if($this->cekKamus($__kata__)){
return $__kata__;
}
}
if(preg_match('/^(mem)[bfpv]/',$kata)){ // 3.
$__kata = preg_replace('/^(mem)/','',$kata);
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata__ = $this->Del_Derivation_Suffixes($__kata);
if($this->cekKamus($__kata__)){
return $__kata__;
}
/*--- cek luluh p- --------*/
$__kata = preg_replace('/^(mem)/','p',$kata); // luluh p-
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata__ = $this->Del_Derivation_Suffixes($__kata);
if($this->cekKamus($__kata__)){
return $__kata__;
}
}
if(preg_match('/^(men)[cdjsz]/',$kata)){ // 4.
$__kata = preg_replace('/^(men)/','',$kata);
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata__ = $this->Del_Derivation_Suffixes($__kata);
if($this->cekKamus($__kata__)){
return $__kata__;
}
}
if(preg_match('/^(me)/',$kata)){ // 5.
$__kata = preg_replace('/^(me)/','',$kata);
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata__ = $this->Del_Derivation_Suffixes($__kata);
if($this->cekKamus($__kata__)){
return $__kata__;
}
/*--- cek luluh t- --------*/
$__kata = preg_replace('/^(men)/','t',$kata); // luluh t-
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata__ = $this->Del_Derivation_Suffixes($__kata);
if($this->cekKamus($__kata__)){
return $__kata__;
}
}
}
/*------------end “me-”, ---------------------------------------------*/
/*------------ Awalan “be-”, ---------------------------------------------*/
if(preg_match('/^(be)/',$kata)){ // Jika awalan “be-”,
/* Cara Menentukan Tipe Awalan Untuk Kata Yang Diawali Dengan “be-”
Following Characters
Set 1 Set 2 Set 3 Set 4 Tipe Awalan
1. “-r-“ Vowel - - ber-
2. “-r-“ Not Vowel - - ber-
3. “-k-“ - - - be-
*/
if(preg_match('/^(ber)[aiueo]/',$kata)){ // 1.
$__kata = preg_replace('/^(ber)/','',$kata);
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata = preg_replace('/^(ber)/','r',$kata);
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata__ = $this->Del_Derivation_Suffixes($__kata);
if($this->cekKamus($__kata__)){
return $__kata__;
}
}
if(preg_match('/(ber)[^aiueo]/',$kata)){ // 2.
$__kata = preg_replace('/(ber)/','',$kata);
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata__ = $this->Del_Derivation_Suffixes($__kata);
if($this->cekKamus($__kata__)){
return $__kata__;
}
}
if(preg_match('/^(be)[k]/',$kata)){ // 3.
$__kata = preg_replace('/^(be)/','',$kata);
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata__ = $this->Del_Derivation_Suffixes($__kata);
if($this->cekKamus($__kata__)){
return $__kata__;
}
}
}
/*------------end “be-”, ---------------------------------------------*/
/*------------ Awalan “pe-”, ---------------------------------------------*/
if(preg_match('/^(pe)/',$kata)){ // Jika awalan “pe-”,
/* Cara Menentukan Tipe Awalan Untuk Kata Yang Diawali Dengan “pe-”
Following Characters
Set 1 Set 2 Set 3 Set 4 Tipe Awalan
1. “-ng-“ Vowel [kghq] - - peng-
2. “-ny-“ Vowel (aiueo) - - peny-s
3. “-m-“ [bfpv] - - pem-
4. “-n-“ [cdjsz] - - pen-
5. “-r-“ - - - per-
6. - - - - pe-
*/
if(preg_match('/^(peng)[aiueokghq]/',$kata)){ // 1.
$__kata = preg_replace('/^(peng)/','',$kata);
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata__ = $this->Del_Derivation_Suffixes($__kata);
if($this->cekKamus($__kata__)){
return $__kata__;
}
}
if(preg_match('/^(peny)/',$kata)){ // 2.
$__kata = preg_replace('/^(peny)/','s',$kata);
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata__ = $this->Del_Derivation_Suffixes($__kata);
if($this->cekKamus($__kata__)){
return $__kata__;
}
}
if(preg_match('/^(pem)[bfpv]/',$kata)){ // 3.
$__kata = preg_replace('/^(pem)/','',$kata);
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata__ = $this->Del_Derivation_Suffixes($__kata);
if($this->cekKamus($__kata__)){
return $__kata__;
}
}
if(preg_match('/^(pen)[cdjsz]/',$kata)){ // 4.
$__kata = preg_replace('/^(pen)/','',$kata);
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata__ = $this->Del_Derivation_Suffixes($__kata);
if($this->cekKamus($__kata__)){
return $__kata__;
}
/*-- Cek luluh -p ----------*/
$__kata = preg_replace('/^(pem)/','p',$kata);
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
}
if(preg_match('/^(per)/',$kata)){ // 5.
$__kata = preg_replace('/^(per)/','',$kata);
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata__ = $this->Del_Derivation_Suffixes($__kata);
if($this->cekKamus($__kata__)){
return $__kata__;
}
/*-- Cek luluh -r ----------*/
$__kata = preg_replace('/^(per)/','r',$kata);
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata__ = $this->Del_Derivation_Suffixes($__kata);
if($this->cekKamus($__kata__)){
return $__kata__;
}
}
if(preg_match('/^(pe)/',$kata)){ // 6.
$__kata = preg_replace('/^(pe)/','',$kata);
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata__ = $this->Del_Derivation_Suffixes($__kata);
if($this->cekKamus($__kata__)){
return $__kata__;
}
}
}
/*------------end “pe-”, ---------------------------------------------*/
/*------------ Awalan “memper-”, ---------------------------------------------*/
if(preg_match('/^(memper)/',$kata)){
$__kata = preg_replace('/^(memper)/','',$kata);
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata__ = $this->Del_Derivation_Suffixes($__kata);
if($this->cekKamus($__kata__)){
return $__kata__;
}
/*-- Cek luluh -r ----------*/
$__kata = preg_replace('/^(memper)/','r',$kata);
if($this->cekKamus($__kata)){
return $__kata; // Jika ada balik
}
$__kata__ = $this->Del_Derivation_Suffixes($__kata);
if($this->cekKamus($__kata__)){
return $__kata__;
}
}
}
/* --- Cek Ada Tidaknya Prefik/Awalan (“di-”, “ke-”, “se-”, “te-”, “be-”, “me-”, atau “pe-”) ------*/
if(preg_match('/^(di|[kstbmp]e)/',$kata) == FALSE){
return $kataAsal;
}
return $kataAsal;
}
public function NAZIEF($kata){
// bisa ngambil id kategori dari form awal.
//$Idkat = addslashes($_POST['kategori']);
//echo "IdKategori = ".$Idkat . "
";
$kataAsal = $kata;
/* 1. Cek Kata di Kamus jika Ada SELESAI */
if($this->cekKamus($kata)){ // Cek Kamus
return $kata; // Jika Ada kembalikan
}
/* 2. Buang Infection suffixes (\-lah", \-kah", \-ku", \-mu", atau \-nya") */
$kata = $this->Del_Inflection_Suffixes($kata);
/* 3. Buang Derivation suffix (\-i" or \-an") */
$kata = $this->Del_Derivation_Suffixes($kata);
/* 4. Buang Derivation prefix */
$kata = $this->Del_Derivation_Prefix($kata);
return $kata;
}
public function _removekata($data){
$stopWordRemover = new StopWordRemover();
$text = $stopWordRemover->remove($data);
return $text;
}
public function stem_list( $words ){
$stemming = "";
if (empty($words)) {
return false;
}
//$results = array();
//$words = split("[ ,;\n\r\t\/:]+", trim($words));
$words = preg_split("/[^A-Za-z]+/", trim($words));
foreach ( $words as $word ) {
$stemming .= NAZIEF($word). ' ';
}
$gethasil = _removekata($stemming );
return $countFrecuency = frekuensi::countFrecuency($gethasil);
//return $gethasil ;
}
}

ada source code nya tidak mas?
ReplyDeleteada source code lengkapnya tidak mas ?
ReplyDeleteBisa share source code lengkapnya ga ?
ReplyDeletemaaf mau tanya untuk bagian hapus derivation prefix di tiap pemenggalan imbuhan selalu tersisip code $__kata__ = $this->Del_Derivation_Suffixes($__kata); fungsinya apa ya ?
ReplyDelete