反正就是一整個好玩!!!(  ̄ c ̄)y▂ξ

目前分類:Ruby (28)

瀏覽方式: 標題列表 簡短摘要

http://www.ruby-forum.com/topic/182957

 

arr = []
arr << dec_num
str = arr.pack("U") #U=UTF-8 => encode unicode 8364 into a UTF-8
character.
puts str
 
php
http://stackoverflow.com/questions/3704538/php-construct-a-unicode-string 

 

#!/usr/bin/env ruby
#unicode decimal value to string
def decToStr(dec_num)
  arr = []
  arr << dec_num
  str = arr.pack("U")
  return str
end
fread = File.new("./uao.csv","r")
while (line=fread.gets)
  dec_num=line.split(',')[1].to_i(16) if (!line.split(',')[1].nil?)
  puts decToStr(dec_num)
end

fread.close

 

#將md5加密的字利用uao列表比對還原,未優化

#!/usr/bin/env ruby
require 'iconv'
require 'digest/md5'

uao = Array.new
fread = File.new("./uaowords.big5","r")
while (line=fread.gets)
  str=line.chomp
  md5value=Digest::MD5.hexdigest("#{str}")
  #printf("%s,%s\n",str,md5value)
  uao << sprintf("%s,%s",str,md5value)
end
fread.close

dbCsv=Array.new
fread = File.new("./db.csv.big5","r")
while (line=fread.gets)
  dbmd5=line.chomp.split(',')[1].gsub('\'','')
  pinyin=line.chomp.split(',')[2..-1].join(',')
  uao.each do |x|
    uaoword=x.chomp.split(',')[0]
    uaomd5=x.chomp.split(',')[1]
    if(dbmd5==uaomd5)
    printf("%s,\'%s\',%s\n",line.chomp.split(',')[0],uaoword,pinyin)
    end
  end
end
fread.close

igogo 發表在 痞客邦 留言(0) 人氣()

八取四不重覆,並且排列順序列入考慮(permutation)

#!/usr/bin/env ruby

require 'permutation'

str = "abcdefgh"
nums = 4  #取的個數
perm = Permutation.for("#{str}")
arr = Array.new
arr = perm.map { |p| p.project[0..(nums-1)] }.sort.uniq

arr.each do |e|
 puts e
end

puts arr.uniq.size

 

 

 

八取四不重覆,並且排列順序不列入考慮(comprehension)

#!/usr/bin/env ruby

require 'permutation'

str = "12345678"
nums = 4  #取的個數
perm = Permutation.for("#{str}")
arr = Array.new
arr = perm.map { |p| p.project[0..(nums-1)].split(//).sort.join }.sort.uniq

arr.each do |e|
 puts e
end

puts arr.size


 

 

參考:

http://dufu.math.ncu.edu.tw/calculus/calculus_eng/node208.html

igogo 發表在 痞客邦 留言(0) 人氣()

與指令paste的功用相同

#!/usr/bin/ruby

files = Array.new
files = ["a.txt","b.txt","c.txt"]

tmp = Array.new
raw_data = Array.new

for i in 0..files.size-1
  File.open(files[i]) do |txt|

    while line=txt.gets
      tmp << line.chomp
    end
    raw_data << tmp.join(',').split(',')
    tmp.clear
  end
end

raw_data.transpose

for i in 0..raw_data.transpose.size-1
  puts raw_data.transpose[i].join("\t")
end





igogo 發表在 痞客邦 留言(0) 人氣()

get mms file -> mp3


#!/usr/bin/ruby -Kw
require 'fileutils'
#mms_stream = ARGV[0]
class Mms 

    def self.FetchMms(mms_stream)
    wav_file=File.basename("#{mms_stream}").gsub(/.wma/,".wav")
            #mms://url

            #存成wav檔
  system("/usr/bin/mplayer -ao pcm:file=#{wav_file} #{mms_stream}")
    mp3_file=wav_file.gsub(/.wav/,".mp3")
            #puts mp3_file
    #將wav轉成mp3檔
    system("/usr/bin/lame -b 42 #{wav_file} #{mp3_file}")
    FileUtils.rm "#{wav_file}"
  end
end

if ARGV[0].nil?
  puts "usage: mms url or file"
else
  Mms.FetchMms(ARGV[0])
end

 

use ffmpeg

ext = ".wma"
Dir.glob("*#{ext}").each {|f| m = f.gsub(ext, '.mp3'); `ffmpeg -i '#{f}' -ab 192k -ac 2 -ar 44100 '#{m}'` }

igogo 發表在 痞客邦 留言(0) 人氣()

 

我真是手賤,

在這時刻還隨便按了個emerge --udate ruby,

結果真是靠北了....gem19就不能用,

一直出現 /usr/bin/ruby: no such file to load -- auto_gem (LoadError)

金北七...仔細google 原來ruby19已經由layman 移到portage了 Orz

升級步驟如下:

echo "-ruby_targets_ruby19" >> /etc/portage/profile/use.mask

emerge --update rubygems

 

在這种關建時刻手殘...只能說自作自受

igogo 發表在 痞客邦 留言(0) 人氣()

ruby with sqlite3

#!/usr/bin/ruby -Ku
# encoding: utf-8
require 'amalgalite'
require 'fileutils'

class String
    @@tmp_db = "tmp_db.txt"
    @@gsp_db = "gsp_corpus.db"
    @@gsp_corpus = "gsp_corpus" #table name                                            @@field_name = "word source"
  def to_db #字串與資料庫查詢
    FileUtils.rm(@@tmp_db) if File.exist?(@@tmp_db)  # start with a clean slate    
    my_db=Amalgalite::Database.new(@@gsp_db)
    stmt = my_db.execute("select * from gsp_corpus where word = ?", "#{self}")
    my_db.close

    if stmt.any?
      File.open("./#{@@tmp_db}","a") do |txt|
               txt.puts stmt.join(',')
      end
      res = File.open("./#{@@tmp_db}").read
      return res
    end
  end #end to_db

  def save_to_db
    str = self.gsub(/,/,"\",\"").gsub(/^/,"\"").gsub(/$/,"\"")
    my_db=Amalgalite::Database.new(@@gsp_db)
    stmt = my_db.execute("select * from gsp_corpus where word = ?", "#{self.split(',')[0]}")
  if !(stmt.any?)
   insert_stmt = "insert into gsp_corpus (%s) values (%s)" % [@@field_name.split.join(','), str]
   my_db.execute(insert_stmt)
   puts "insert into sqlite3_db sucessfully"
  end
    my_db.close


  end
end #end class





igogo 發表在 痞客邦 留言(0) 人氣()

raw tf csv file

a,1,2,3,4

b,1,2,3,4

c,1,2,3,4

 

fname=ARGV[0]

ary=[]
File.open("#{fname}") { |content|
  while line=content.gets
    ary << line[0].to_s
    ary << line.split(',')[2..-1].map do |x|
                                        if (x.to_f>0)
                                          x = 1
                                        else
                                          x = 0
                                        end
                                      end
    puts ary.join(',')
    ary.clear
  end
}


igogo 發表在 痞客邦 留言(0) 人氣()

求一超球体的圓心與半徑


#!/usr/bin/ruby -Ku
require 'ai4r'
require 'csv'

def EuclideanDistance(x,y)
  sum = 0
  for i in (0..x.size-1)
    #printf("x[%d]=%f,y[%d]=%f\n",i,x[i],i,y[i])
    ->(px,py){ sum += (px-py)**2.0}.(x[i],y[i])
  end
return Math.sqrt(sum)
end

csv_file = "tfidf-ckip-a.csv"
f=File.new("#{csv_file}")
data = Array.new
while (line = f.gets)
  data << line.split(',').collect  do |s| s.to_f end
end

#data = [[1,1,1], [3,3,3],[9,9,9]]
ai4r_data = Ai4r::Data::DataSet.new(:data_items=> data)
kmeans = Ai4r::Clusterers::KMeans.new
#abuild(dataset,clusters)
res=kmeans.build(ai4r_data,1)
#puts res.inspect
center = res.centroids[0] #此球体圓心
r = 0 #此球体半徑


 r = 0 #此球体半徑
tmp = 0
printf("計算中...\n")
for i  in (0..data.size-1)
  tmp = EuclideanDistance(center,data[i])
  r = tmp if(tmp > r)
end

printf("球体,%s\n",csv_file)
printf("球体半徑為,%f\n",r)
printf("球体圓心為,%s\n",center.join(','))



igogo 發表在 痞客邦 留言(0) 人氣()


#!/usr/bin/ruby -Ku
# encoding: utf-8
require 'amalgalite'
require 'fileutils'

class Csv_to_db
  def initialize(db_name, csv_name)
    db_file = "#{db_name}.db"
    FileUtils.rm(db_file) if File.exist?(db_file)  # start with a clean slate
    my_db=Amalgalite::Database.new(db_file)
    csv_rows = File.read("#{csv_name}", :encoding => "utf-8").split("\n")
    col_names = csv_rows[0].split(',')
    col_names_types= col_names.map do |col| col + ' char(1)' end
    sql_create = "create table ctr_map(%s)" % col_names_types.join(', ')
    my_db.execute(sql_create)
    stmt = csv_rows[1..-1].map do |x|
      "insert into ctr_map (%s) values (%s)" % [col_names.join(', '), x]
    end
    stmt.each do |row_insert|
      my_db.execute(row_insert)
    end
    puts "ok"
  end
end

db_name = "ctr_map"
csv_name = "chinese_map.csv"
Csv_to_db.new(db_name,csv_name)

lala

inser  into sqlite3 db

  def save_to_db
    str = self.gsub(/,/,"\",\"").gsub(/^/,"\"").gsub(/$/,"\"")
    my_db=Amalgalite::Database.new(@@gsp_db)
    stmt = my_db.execute("select * from gsp_corpus where word = ?", "#{self.split(',')[0]}")
  if !(stmt.any?)
   insert_stmt = "insert into gsp_corpus (%s) values (%s)" % [@@field_name.split.join(','), str]
  end
    my_db.execute(insert_stmt)
    my_db.close
  end


#str = "教育部,gs"

igogo 發表在 痞客邦 留言(0) 人氣()

  • 這是一篇加密文章,請輸入密碼
  • 密碼提示:西江小館
  • 請輸入密碼:


#!/usr/bin/ruby -Ku
# encoding: UTF-8
#將停用字移除
class String
  def to_rm_stop_words(stop_words)
    msg = self
      stop_words.each do |e|
        msg = msg.gsub("#{e}",',')
      end
    return msg
  end
end


stop_words_db = "stop_words.csv" #stop words list
dst_dir = "dst_dir" #處理檔案放置位置
file_array = Dir.glob("*.txt")

#stop_words
stop_words = Array.new
  File.open("#{stop_words_db}") do |content|
    while line = content.gets
      stop_words << line.strip
    end
  end

file_array.each do |each_file|
  msg = String.new #msg的型別為string
  File.open("#{each_file}") do |content|
    while line = content.gets
      msg << line.to_s.strip.chomp
    end
  end
   revised_msg = msg.to_rm_stop_words(stop_words)
.gsub(/,+/,",")


  File.open("./#{dst_dir}/#{each_file}","a") do |content|
    content.puts revised_msg
  end
  msg.clear
end




igogo 發表在 痞客邦 留言(0) 人氣()

class String

  def mbsubstr1(*range)

      split(//)[*range].join("")

  end

 

   def mbsubstr2(idx, len)

       slice(/\A.{#{idx}}(.{0,#{len}}/m, 1)

   end

end

p "一大串字一大串字".mbsubstr1(2,3)

p "一大串字一大串字".mbsubstr1(2..3)

p "一大串字一大串字".mbsubstr2(2,2)

Maximum Matching Segmentation (MMSeg)

#str 一段文字, term 字典的中的詞

def mmseg(str,term)
strEnd = str.length-1
  loop do
    tmpStr=str.substr(0..strEnd)
    strEnd -= 1
    sleep 0.1
    puts tmpStr

    if tmpStr == term
      print "I found #{tmpStr}"
      break
    end
    if (strEnd == -1)
      str=str.substr(1..-1)
      strEnd = str.length-1
      if str.length == 1
        puts "no match"
        break
      end
    end
  end
end

 

 

igogo 發表在 痞客邦 留言(0) 人氣()

 

使用lambda 計算 Euclidean distance

若p(1,1), q(3,3)

則 d(p,q)

 ->(xs,ys,xt,yt){Math.sqrt((xs-xt)**2+(ys-yt)**2)}.(1,1,3,3)

 

lambda fuction

 f = { 1 => ->(x) { puts "#{x}  from 1" },

         2 => ->(x) { puts "#{x} bye from 2" }

      }

f.call[1]("mary")

 

HyperSphere 中,求任兩點的的距離(EuclideanDistance)

def EuclideanDistance(x,y)
  sum = 0
  for i in (0..x.size-1)
    printf("x[%d]=%d,y[%d]=%d\n",i,x[i],i,y[i])
    ->(px,py){ sum += (px-py)**2}.(x[i],y[i])
  end
puts Math.sqrt(sum)
end

x=[1,1]
y=[3,3]
p=[1,1,1]
q=[3,3,3]

a=[1,1,1,1]
b=[3,3,3,3]

EuclideanDistance(x,y)
EuclideanDistance(p,q)

EuclideanDistance(a,b)

 

igogo 發表在 痞客邦 留言(0) 人氣()

n類中,取任兩類別算相似度, 第一步,先把要算的組合列出來

n=4
i=1
j=i+1

while (i<4) do
  while (j<=n) do
    printf("%d,%d\n",i,j)
    j += 1
  end
  i += 1
  j = i+1
end

igogo 發表在 痞客邦 留言(0) 人氣()

參考網址:

 A Comprehensive Comparative Study on Term Weighting Schemes for Text Categorization with SVM

Proposing a New Term Weighting Scheme for Text Categorization

Supervised and Traditional Term Weighting Methods for Automatic Text Categorization

igogo 發表在 痞客邦 留言(0) 人氣()

require 'rexml/document'
require 'net/http'
require 'cgi'
require 'iconv'

class String
  def to_googlesuggest
    word = self
    http  = Net::HTTP.new('www.google.com', 80)
    query = "/complete/search?output=toolbar&hl=zh-TW&js=true&qu==#{CGI::escape(word)}"
    req   = Net::HTTP::Get.new(query)
    res   = http.request(req).body

    File.open('suggest.xml','w') do |f|
        f.write(res.to_s)
    end

    doc = REXML::Document.new Iconv.conv("UTF-8//Ignore", 'UTF-8', res)
    suggested_words = []
    doc.get_elements('//toplevel/CompleteSuggestion').each do |e|
      suggested_words << e.elements['suggestion'].attributes['data'].to_s.split(/\s+/)
    end
    return  suggested_words.flatten!.uniq!

  end
end


#參考

#http://kwappa.txt-nifty.com/blog/2009/10/google-suggest-.html


#http://zh.wikipedia.org/w/api.php?action=opensearch&limit=10&format=xml&search=tf-idf

igogo 發表在 痞客邦 留言(0) 人氣()

檔案茫茫如海,怎麼找到想要的檔?

 


farr=Dir["*.txt"]

for i in 0..(farr.size-1)
  File.open("#{farr[i]}") { |file|
   while words=file.gets
      if words =~ (/xml/)
        print farr[i]
      end
   end
  }
end

igogo 發表在 痞客邦 留言(0) 人氣()

矩陣轉置

#!/usr/bin/ruby -Kuw

fname=ARGV[0]
arr=[]
File.open("#{fname}") { |file|
  while line=file.gets
   arr << line.chomp.split(',')
  end
}

tarr=[]
tarr=arr.transpose


for i in 0..tarr.size-1
  puts tarr[i].join(',')
end




igogo 發表在 痞客邦 留言(0) 人氣()

 

#!/usr/X11R6/bin/ruby -wKu
require 'csv'

fname="simple.csv"

f=File.read("#{fname}")
#列出第一,三...到最後一行,第二行不要
puts CSV.parse(f).map{ |x|
  [x[0], *x[2..-1]].join(',')
}

 

 

第一行移到最後一行


require 'csv'

fname=ARGV[0]

f=File.read("#{fname}")
puts CSV.parse(f).map{ |x|
  [*x[1..-1] << x[0]].join(',')
}

 

 

 

lala

csv 欄位字串變為數值

 "1,2,3".split(",").collect{ |s| s.to_i }
=> [1, 2,3]

 

igogo 發表在 痞客邦 留言(0) 人氣()

a = [1,2,3,4]

b = a.map {|x| x*x }  #b=[1, 4, 9, 16]

c = a.select {|x| x%2==0 } #c=[2,4]

d=a.inject(0){|sum,x| sum+=x**2} #d=30

 

igogo 發表在 痞客邦 留言(0) 人氣()

1 2