close


#!/usr/bin/ruby -Ku
# encoding: UTF-8
#將停用字移除
class String
  def to_rm_stop_words(stop_words)
    msg = self
      stop_words.each do |e|
        msg = msg.gsub("#{e}",',')
      end
    return msg
  end
end


stop_words_db = "stop_words.csv" #stop words list
dst_dir = "dst_dir" #處理檔案放置位置
file_array = Dir.glob("*.txt")

#stop_words
stop_words = Array.new
  File.open("#{stop_words_db}") do |content|
    while line = content.gets
      stop_words << line.strip
    end
  end

file_array.each do |each_file|
  msg = String.new #msg的型別為string
  File.open("#{each_file}") do |content|
    while line = content.gets
      msg << line.to_s.strip.chomp
    end
  end
   revised_msg = msg.to_rm_stop_words(stop_words)
.gsub(/,+/,",")


  File.open("./#{dst_dir}/#{each_file}","a") do |content|
    content.puts revised_msg
  end
  msg.clear
end




arrow
arrow
    全站熱搜

    igogo 發表在 痞客邦 留言(0) 人氣()