#!/usr/bin/ruby -Ku
# encoding: utf-8
require 'amalgalite'
require 'fileutils'

class Csv_to_db
def initialize(db_name, csv_name)
    db_file = "#{db_name}.db"
    FileUtils.rm(db_file) if File.exist?(db_file) # start with a clean slate
    my_db=Amalgalite::Database.new(db_file)
    csv_rows = File.read("#{csv_name}", :encoding => "utf-8").split("\n")
    col_names = csv_rows[0].split(',')
    col_names_types= col_names.map do |col| col + ' char(1)' end
    sql_create = "create table ctr_map(%s)" % col_names_types.join(', ')
    my_db.execute(sql_create)
    stmt = csv_rows[1..-1].map do |x|
      "insert into ctr_map (%s) values (%s)" % [col_names.join(', '), x]
    end
    stmt.each do |row_insert|
      my_db.execute(row_insert)
    end
    puts "ok"
end
end

db_name = "ctr_map"
csv_name = "chinese_map.csv"
Csv_to_db.new(db_name,csv_name)

lala

inser into sqlite3 db

def save_to_db
    str = self.gsub(/,/,"\",\"").gsub(/^/,"\"").gsub(/$/,"\"")
    my_db=Amalgalite::Database.new(@@gsp_db)
    stmt = my_db.execute("select * from gsp_corpus where word = ?", "#{self.split(',')[0]}")
if !(stmt.any?)
   insert_stmt = "insert into gsp_corpus (%s) values (%s)" % [@@field_name.split.join(','), str]
end
    my_db.execute(insert_stmt)
    my_db.close
end

#str = "教育部,gs"

igogo 發表在痞客邦留言(0) 人氣()

個人分類：Ruby

▲top

Jan 27 Wed 2010 12:35
[ruby]MMSeg

▲top

Jan 25 Mon 2010 18:10
[ruby] stop words elimination

#!/usr/bin/ruby -Ku
# encoding: UTF-8
#將停用字移除
class String
def to_rm_stop_words(stop_words)
    msg = self
      stop_words.each do |e|
        msg = msg.gsub("#{e}",',')
      end
    return msg
end
end

stop_words_db = "stop_words.csv" #stop words list
dst_dir = "dst_dir" #處理檔案放置位置
file_array = Dir.glob("*.txt")

#stop_words
stop_words = Array.new
File.open("#{stop_words_db}") do |content|
    while line = content.gets
      stop_words << line.strip
    end
end

file_array.each do |each_file|
msg = String.new #msg的型別為string
File.open("#{each_file}") do |content|
    while line = content.gets
      msg << line.to_s.strip.chomp
    end
end
   revised_msg = msg.to_rm_stop_words(stop_words).gsub(/,+/,",")

File.open("./#{dst_dir}/#{each_file}","a") do |content|
    content.puts revised_msg
end
msg.clear
end

igogo 發表在痞客邦留言(0) 人氣()

個人分類：Ruby

▲top

Dec 31 Thu 2009 08:23
[ruby]取出字串的一部份

class String

def mbsubstr1(*range)

split(//)[*range].join("")

end

def mbsubstr2(idx, len)

slice(/\A.{#{idx}}(.{0,#{len}}/m, 1)

end

p "一大串字一大串字".mbsubstr1(2,3)

p "一大串字一大串字".mbsubstr1(2..3)

p "一大串字一大串字".mbsubstr2(2,2)

Maximum Matching Segmentation (MMSeg)

#str 一段文字, term 字典的中的詞

def mmseg(str,term)
strEnd = str.length-1
loop do
    tmpStr=str.substr(0..strEnd)
    strEnd -= 1
    sleep 0.1
    puts tmpStr

    if tmpStr == term
      print "I found #{tmpStr}"
      break
    end
    if (strEnd == -1)
      str=str.substr(1..-1)
      strEnd = str.length-1
      if str.length == 1
        puts "no match"
        break
      end
    end
end
end

igogo 發表在痞客邦留言(0) 人氣()

個人分類：Ruby

▲top

Dec 25 Fri 2009 18:04
[ruby] Euclidean distance

使用lambda 計算 Euclidean distance

若p(1,1), q(3,3)

則 d(p,q)

->(xs,ys,xt,yt){Math.sqrt((xs-xt)**2+(ys-yt)**2)}.(1,1,3,3)

lambda fuction

f = { 1 => ->(x) { puts "#{x} from 1" },

2 => ->(x) { puts "#{x} bye from 2" }

}

f.call[1]("mary")

在 HyperSphere 中，求任兩點的的距離(EuclideanDistance)

def EuclideanDistance(x,y)
sum = 0
for i in (0..x.size-1)
printf("x[%d]=%d,y[%d]=%d\n",i,x[i],i,y[i])
->(px,py){ sum += (px-py)**2}.(x[i],y[i])
end
puts Math.sqrt(sum)
end

x=[1,1]
y=[3,3]
p=[1,1,1]
q=[3,3,3]
a=[1,1,1,1]
b=[3,3,3,3]

EuclideanDistance(x,y)
EuclideanDistance(p,q)
EuclideanDistance(a,b)

igogo 發表在痞客邦留言(0) 人氣()

個人分類：Ruby

▲top

Dec 19 Sat 2009 08:41
[ruby] n類別中，任意取兩個類別比較

n類中，取任兩類別算相似度, 第一步，先把要算的組合列出來

n=4
i=1
j=i+1

while (i<4) do
while (j<=n) do
printf("%d,%d\n",i,j)
j += 1
end
i += 1
j = i+1
end

igogo 發表在痞客邦留言(0) 人氣()

個人分類：Ruby

▲top

Dec 12 Sat 2009 18:43
[ruby] K means(cluster)

require 'ai4r'

data = [[1,1,1], [3,3,3],[9,9,9]]
ai4r_data = Ai4r::Data::DataSet.new(:data_items=> data)

kmeans = Ai4r::Clusterers::KMeans.new

#abuild(dataset,clusters)

res=kmeans.build(ai4r_data,2)

puts res.inspect

puts res.centroids.join(',')

Euclidean distance

http://en.wikipedia.org/wiki/Euclidean_space

http://en.wikipedia.org/wiki/Euclidean_distance

dis = Ai4r::Clusterers::KMeans.new

a = [1,1]
b = [3,3]

puts Math.sqrt(dis.distance(a,b))

都是幻覺，嚇不倒我的 ( ￣ c￣)y▂ξ

igogo 發表在痞客邦留言(0) 人氣()

個人分類：人工智慧應用

▲top

Dec 04 Fri 2009 23:53
[ruby] transform csv to libsvm format

#!/usr/bin/ruby -Ku
require 'rubygems'
require 'csv'

f=ARGV[0]
File.open("#{f}") { |content|
while line=content.gets
    puts CSV.parse(line).map{ |x|
     [ x[0], *x[1..-1].each.with_index.map{ |y, i| "#{i+1}:#{y}"} ].join(' ')
    }.join("\n")
end
}

igogo 發表在痞客邦留言(0) 人氣()

個人分類：人工智慧應用

▲top

«	四月 2025					»
日	一	二	三	四	五	六
		1	2	3	4	5
6	7	8	9	10	11	12
13	14	15	16	17	18	19
20	21	22	23	24	25	26
27	28	29	30

牛大叔．生活隨筆

趴趴照

[ruby] csv to sqlite3

[ruby]MMSeg

[ruby] stop words elimination

[ruby]取出字串的一部份

[ruby] Euclidean distance

[ruby] n類別中，任意取兩個類別比較

[ruby] K means(cluster)

Euclidean distance

[ruby] transform csv to libsvm format

月曆

參觀人氣

相關連結

最新迴響

«	四月 2025					»
日	一	二	三	四	五	六
		1	2	3	4	5
6	7	8	9	10	11	12
13	14	15	16	17	18	19
20	21	22	23	24	25	26
27	28	29	30

«	四月 2025					»
日	一	二	三	四	五	六
		1	2	3	4	5
6	7	8	9	10	11	12
13	14	15	16	17	18	19
20	21	22	23	24	25	26
27	28	29	30

«	四月 2025					»
日	一	二	三	四	五	六
		1	2	3	4	5
6	7	8	9	10	11	12
13	14	15	16	17	18	19
20	21	22	23	24	25	26
27	28	29	30