# prime/prime-mixed.rb
# $Id: prime-mixed.rb,v 1.3 2005/03/07 07:51:32 komatsu Exp $
#
# Copyright (C) 2004 Hiroyuki Komatsu <komatsu@taiyaki.org>
#     All rights reserved.
#     This is free software with ABSOLUTELY NO WARRANTY.
#
# You can redistribute it and/or modify it under the terms of 
# the GNU General Public License version 2.
#

module PrimeMixed
#   def initialize_prime_mixed ()
#     @prime_mixed_bigram = Hash.new(0)
#     filename = "/tmp/bigram-dict"
#     open(filename).each {|line|
#       (prev_char, char, score) = line.split("\t")
#       @prime_mixed_bigram[prev_char + "\t" + char] = score
#     }
#   end

  def convert_mixed ()
    context    = get_context()
    conversion = @composer.edit_get_surface_string()
    data_list  = process_data(conversion)
    results    = lookup_mixed_concat_data_list(data_list)
    wordlist   = PrimeWordList::merge_with_label(context, results)
    return _adhoc_wordlist_to_conversionlist( wordlist )
  end

  def lookup_mixed (string, max = 10)
    string = PrimeTypeConv::convert(string).join()
    data_list = process_data(string)
    results  = lookup_mixed_concat_data_list(data_list)
    return PrimeWordList::merge_with_label(@context, results)
  end

  def process_data (pattern)
    if pattern.empty? then
      return []
    end

    chars = pattern.split(//)
    
    chars.length.step(1, -1) {|l|
      pattern1 = chars[0,l].join()
      pattern2 = chars[l..-1].join()

      query = PrimeQuery.new([pattern1], nil, :exact)
      words = search(query)
      literals = words.map{|word| word.literal }.compact
      unless literals.empty? then
        return [[[pattern1], literals], *process_data(pattern2)]
      end

      query = PrimeQuery.new([pattern1], nil, :literal_exact)
      words = search(query)
      prons = words.map{|word| word.pron }.compact
      unless prons.empty? then
        return [[prons, [pattern1]], *process_data(pattern2)]
      end
    }
    pattern1 = chars[0,1].join()
    pattern2 = chars[1..-1].join()
    return [[[pattern1], [pattern1]], *process_data(pattern2)]
  end

  def lookup_mixed_combine_regexp (data_list)
    regexp_literal = ""

    data_list.each {|(prons, literals)|
      if literals.length == 1 then
        regexp_literal += literals[0].gsub(/[-]+/, ".+")
      else
        regexp_literal += ".+"
      end
    } 
    return regexp_literal.gsub(/(\.\+)+/, ".+")
  end

#   def lookup_mixed_combine_regexp2 (data_list)
#     regexp_pron    = ""
#     regexp_literal = ""

#     data_list.each {|(prons, literals)|
#       if prons.length == 1 then
#         regexp_pron += prons[0]
#       else
#         regexp_pron += "(" + prons.uniq.join("|") + ")"
#       end
#       if literals.length == 1 then
#         regexp_literal += literals[0]
#       else
#         regexp_literal += "(" + literals.uniq.join("|") + ")"
#       end
#     }
# #     p regexp_pron
# #     p regexp_literal
#   end

  def lookup_mixed_concat_data_list (data_list)
    lookup_mixed_concat_data_list_pron(data_list)
  end

#   def lookup_mixed_concat_data_list_2 (data_list)
#     result_words = PrimeWordList.new()
#     (prons, literals) = data_list[0]
    
#     literals.each {|literal|
#       query = PrimeQuery.new([literal], nil, :literal_prefix)
#       words = search(query)
#       result_words += 
#         lookup_mixed_concat_data_list_2_internal(words,
#                                                  literal,
#                                                  data_list[1..-1])
#     }
#     return result_words
#   end

#   def lookup_mixed_concat_data_list_2_internal (words, prefix, data_list)
#     p words

#     result_words = PrimeWordList.new()

#     if data_list.empty? then
#       scoped_words = PrimeWordList.new()
#       words.each {|word|
#         if word.literal == prefix then
#           result_words.push(word)
#         end
#       }
#       return result_words
#     end

#     (prons, literals) = data_list[0]
#     literals.each {|literal|
#       next_prefix = prefix + literal
#       scoped_words = PrimeWordList.new()
#       words.each {|word|
#         if word.literal.index(next_prefix) == 0 then
#           scoped_words.push(word)
#         end
#       }
#       result_words +=
#         lookup_mixed_concat_data_list_2_internal(scoped_words,
#                                                  next_prefix,
#                                                  data_list[1..-1])
#     }
#     return result_words
#   end


#   def lookup_mixed_concat_data_list_literal (data_list)
#     data_cands = [""]
#     data_list.each {|(prons, literals)|
#       new_data_cands = []
#       literals.each {|literal|
#         data_cands.each {|data_cand|
#           new_data_cands.push( data_cand + literal )
#         }
#       }
#       data_cands = new_data_cands
#     }
#     query = PrimeQuery.new(data_cands, nil, :literal_exact)
#     words = search(query)
#     return words
#   end

  def lookup_mixed_concat_data_list_pron (data_list)
    data_cands = [""]
    data_list.each {|(prons, literals)|
      new_data_cands = []
      prons.each {|pron|
        data_cands.each {|data_cand|
          new_data_cands.push( data_cand + pron )
        }
      }
      data_cands = new_data_cands
    }
    query = PrimeQuery.new(data_cands, nil, :exact)
#    query = PrimeQuery.new(data_cands, nil, :prefix)
    temp_words = search(query)

#   literal_regexp = "^" + lookup_mixed_combine_regexp(data_list) + "$"
    literal_regexp = "^" + lookup_mixed_combine_regexp(data_list)
    words = PrimeWordList.new()
    temp_words.each {|word|
      if word.literal =~ /#{literal_regexp}/ then
        words.push(word)
      end
    }
    return words
  end

#   def lookup_mixed_concat_data_list (data_list)
#     data_cands = [""]
#     data_list.each {|(prons, literals)|
#       new_data_cands = []

#       if literals.length > 1 and data_cands.length > 1 then
#         # Cutting off the data_cands
#         data_cands.each {|data_cand|
#           query = PrimeQuery.new([data_cand], nil, :literal_prefix)
#           words = search(query)
#           if words.length > 0 then
#             new_data_cands.push(data_cand)
#           end
#         }
#         data_cands = new_data_cands
#         new_data_cands = []
#       end

#       literals.each {|literal|
#         data_cands.each {|data_cand|
#           new_data_cands.push( data_cand + literal )
#         }
#       }
#       data_cands = new_data_cands
#     }
#     query = PrimeQuery.new(data_cands, nil, :literal_exact)
#     words = search(query)
#     return words
#   end

#   def lookup_mixed_concat_data_list3 (data_list)  # ֿ=פ̤ʤ
#     data_cands = [""]
#     data_list.each {|(prons, literals)|
#       new_data_cands = []
#       prons.each {|pron|
#         data_cands.each {|data_cand|
#           new_data_cands.push( data_cand + pron )
#         }
#       }
#       data_cands = new_data_cands
#     }
#     query = PrimeQuery.new(data_cands, nil, :exact)
#     words = search(query)
#     return words
#   end

#   def taiyaki_1 (word_cands, prefix_literal, data_list)
#     if data_list.length == 0 then
#       return word_cands
#     end

#     (prons, literals) = data_list[0]

#     ## FIXME: There is better matching algorithm.
#     ## FIXME: Hiroyuki Komatsu <komatsu@taiyaki.org>
#     literals.each {|literal|
#       new_word_cands = []
#       new_prefix_literal = prefix_literal + literal

#       # !!!
#       word_cands.clone()

#       word_cands.each {|word|
#         if word.literal.index(new_prefix_literal) == 0 then
#           # The prefix of the word is the new_prefix_literal
#           new_word_cands.push(word)
#         end
#       }
#       taiyaki_1(new_word_cands, new_prefix_literal, data_list[1..-1])
#     }

#     return taiyaki_1 (new_word_cands, 
#     (pron
#     word_cands
#   end

#   def lookup_mixed_concat_data_list4 (data_list)
#     (prons, literals) = data_list[0]
#     data_cands = literals

#     data_list[0..-1].each {|(prons, literals)|
#       query = PrimeQuery.new(data_cands, nil, :literal_prefix)
#       cand_words = search(query)

#       cand_words.each {|word|
#         word.literal

#       new_data_cands = []
#       literals.each {|literal|
#         data_cands.each {|data_cand|
#           new_data_cands.push( data_cand + literal )
#         }
#       }
#       data_cands = new_data_cands
#     }
#     query = PrimeQuery.new(data_cands, nil, :literal_exact)
#     words = search(query)
#     return words
#   end

#   def lookup_mixed_concat_data_list2 (data_list)
#     data = [["", ""]]
#     last_data = data_list.pop()
#     data_list.each {|data2|
#       tmp_data = []
#       data.each {|(pron1, literal1)|
#         data2[1].each {|literal2|
#           literal = literal1 + literal2
#           query = PrimeQuery.new([literal], nil, :literal_exact)
#           words = search(query)
#           words.each {|word|
#             data2[0].each {|pron2|
#               if word.pron == (pron1 + pron2) then
#                 unless tmp_data.member?([word.pron, word.literal])
#                   tmp_data.push([word.pron, word.literal]) 
#                 end
#               end
#             }
#           }            
#         }
#       }
#       data = tmp_data
#     }

#     results = PrimeWordList.new()
#     data.each {|(pron1, literal1)|
#       last_data[1].each {|literal2|
#         literal = literal1 + literal2

#         query = PrimeQuery.new([literal], nil, :literal_exact)
#         words = search(query)
#         words.each {|word|
#           last_data[0].each {|pron2|
#             if word.pron == (pron1 + pron2) then
#               results << word
#             end
#           }
#         }
#       }
#     }
#     return results
#   end
end
