#!/usr/bin/ruby -Ku # encoding: UTF-8 #將停用字移除 class String def to_rm_stop_words(stop_words) msg = self stop_words.each do |e| msg = msg.gsub("#{e}",',') end return msg end end
stop_words_db = "stop_words.csv" #stop words list dst_dir = "dst_dir" #處理檔案放置位置 file_array = Dir.glob("*.txt")
#stop_words stop_words = Array.new File.open("#{stop_words_db}") do |content| while line = content.gets stop_words << line.strip end end
file_array.each do |each_file| msg = String.new #msg的型別為string File.open("#{each_file}") do |content| while line = content.gets msg << line.to_s.strip.chomp end end revised_msg = msg.to_rm_stop_words(stop_words).gsub(/,+/,",")
File.open("./#{dst_dir}/#{each_file}","a") do |content| content.puts revised_msg end msg.clear end
|