# First, let's install the pycantonese package and import it 
# These steps are similar to downloading an app on your phone and then opening it

# For code blocks like this one, Ctrl + Enter is the shortcut to run it! 

!pip install pycantonese
import pycantonese

Requirement already satisfied: pycantonese in /home/charles/.local/lib/python3.8/site-packages (3.4.0)
Requirement already satisfied: pylangacq<0.17.0,>=0.16.0 in /home/charles/.local/lib/python3.8/site-packages (from pycantonese) (0.16.2)
Requirement already satisfied: wordseg==0.0.2 in /home/charles/.local/lib/python3.8/site-packages (from pycantonese) (0.0.2)
Requirement already satisfied: python-dateutil<=3.0.0,>=2.0.0 in /home/charles/.local/lib/python3.8/site-packages (from pylangacq<0.17.0,>=0.16.0->pycantonese) (2.8.2)
Requirement already satisfied: tabulate[widechars]<=0.9.0,>=0.8.9 in /home/charles/.local/lib/python3.8/site-packages (from pylangacq<0.17.0,>=0.16.0->pycantonese) (0.8.9)
Requirement already satisfied: requests<=3.0.0,>=2.18.0 in /usr/lib/python3/dist-packages (from pylangacq<0.17.0,>=0.16.0->pycantonese) (2.22.0)
Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil<=3.0.0,>=2.0.0->pylangacq<0.17.0,>=0.16.0->pycantonese) (1.14.0)
Requirement already satisfied: wcwidth; extra == "widechars" in /home/charles/.local/lib/python3.8/site-packages (from tabulate[widechars]<=0.9.0,>=0.8.9->pylangacq<0.17.0,>=0.16.0->pycantonese) (0.2.5)


londonbridge =  """有隻雀仔跌落水 跌落水 跌落水
        有隻雀仔跌落水 被水沖去"""

happybday = """祝你生日快樂
            祝你生日快樂
            祝你生日快樂
            祝你生日快樂
            """

happybday2 = """恭祝你福壽與天齊 慶賀你生辰快樂
             年年都有今日 歲歲都有今朝
             恭喜你 恭喜你"""

songs = [londonbridge,happybday,happybday2]


# We can simply do this: 

parsed_lines = pycantonese.parse_text(happybday2)

print(parsed_lines.head())

*X:    恭祝            你         福壽           與         天         齊        慶賀           你         生辰             快樂
%mor:  VERB|gung1zuk1  PRON|nei5  NOUN|fuk1sau6  NOUN|jyu5  NOUN|tin1  ADJ|cai4  VERB|hing3ho6  PRON|nei5  VERB|saang1san4  NOUN|faai3lok6

*X:    年年           都        有今日             歲          歲          都        有         今朝
%mor:  NOUN|nin4nin4  ADV|dou1  VERB|jau5gam1jat6  NOUN|seoi3  NOUN|seoi3  ADV|dou1  VERB|jau5  NOUN|gam1ziu1

*X:    恭喜            你         恭喜            你
%mor:  VERB|gung1hei2  PRON|nei5  VERB|gung1hei2  PRON|nei5


# Or loop through the list of songs, then perform the same "parse_text()" function

for song in songs:
  print(pycantonese.parse_text(song).head())

*X:    有隻        雀仔            跌落           水          跌落           水          跌落           水
%mor:  X|jau5zek3  NOUN|zoek3zai2  VERB|dit3lok6  NOUN|seoi2  VERB|dit3lok6  NOUN|seoi2  VERB|dit3lok6  NOUN|seoi2

*X:    有隻        雀仔            跌落           水          被         水          沖          去
%mor:  X|jau5zek3  NOUN|zoek3zai2  VERB|dit3lok6  NOUN|seoi2  NOUN|pei5  NOUN|seoi2  VERB|cung1  VERB|heoi3

*X:    祝         你         生日快樂
%mor:  VERB|zuk1  PRON|nei5  VERB|saang1jat6faai3lok6

*X:    祝         你         生日快樂
%mor:  VERB|zuk1  PRON|nei5  VERB|saang1jat6faai3lok6

*X:    祝         你         生日快樂
%mor:  VERB|zuk1  PRON|nei5  VERB|saang1jat6faai3lok6

*X:    祝         你         生日快樂
%mor:  VERB|zuk1  PRON|nei5  VERB|saang1jat6faai3lok6

*X:    恭祝            你         福壽           與         天         齊        慶賀           你         生辰             快樂
%mor:  VERB|gung1zuk1  PRON|nei5  NOUN|fuk1sau6  NOUN|jyu5  NOUN|tin1  ADJ|cai4  VERB|hing3ho6  PRON|nei5  VERB|saang1san4  NOUN|faai3lok6

*X:    年年           都        有今日             歲          歲          都        有         今朝
%mor:  NOUN|nin4nin4  ADV|dou1  VERB|jau5gam1jat6  NOUN|seoi3  NOUN|seoi3  ADV|dou1  VERB|jau5  NOUN|gam1ziu1

*X:    恭喜            你         恭喜            你
%mor:  VERB|gung1hei2  PRON|nei5  VERB|gung1hei2  PRON|nei5


from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


folder = "/content/drive/My Drive/Lyrics/MyFavSongs/"

for song in songs:
    with open(folder + str(song[:4]) + ".cha", "w") as f:
        f.write("@UTF8\n")
        f.write("@Begin\n")
        
        lines = pycantonese.parse_text(song)
        f.write(str(lines.head()))

        f.write("@END")


url = "https://charles-lam.net/cantopop/MyFavSongs.zip"
corpus = pycantonese.read_chat(url)

corpus.n_files() # Return number of files  
# len(corpus.words()) # Return number of tokens (i.e. word count) 

# print(f'There are {corpus.n_files()} files in the corpus.')

# print(f'There are {len(corpus.words())} tokens in the corpus.')

3


you = corpus.search(character='你')

print(f'There are {len(you)} tokens of the keyword.')

There are 8 tokens of the keyword.


all_verbs = corpus.search(pos='VERB')

print(f'There are {len(all_verbs)} tokens of the keyword.')

There are 21 tokens of the keyword.


Beyond_corpus = "https://charles-lam.net/cantopop/Beyond.zip"
TatMingPair_corpus = "https://charles-lam.net/cantopop/TatMingPair.zip"

beyond = pycantonese.read_chat(Beyond_corpus)

tmp = pycantonese.read_chat(TatMingPair_corpus)


# Number of Songs 
# Because we save each song to its own CHA file, the number of files means the number of songs

print(f'Beyond: {beyond.n_files()} songs')
print(f'Tat Ming Pair: {tmp.n_files()} songs')

Beyond: 109 songs
Tat Ming Pair: 60 songs


# Song titles

beyond.file_paths()
# tmp.file_paths()

['Beyond/Amani_parsed.cha',
 'Beyond/Bye-Bye_parsed.cha',
 'Beyond/Once_again_parsed.cha',
 'Beyond/Water_boy_parsed.cha',
 'Beyond/不再猶豫_parsed.cha',
 'Beyond/不可一世_parsed.cha',
 'Beyond/亞拉伯跳舞女郎_parsed.cha',
 'Beyond/交織千個心_parsed.cha',
 'Beyond/係要聽ROCK_N_ROLL_parsed.cha',
 'Beyond/俾面派對_parsed.cha',
 'Beyond/光輝歲月_parsed.cha',
 'Beyond/全是愛_parsed.cha',
 'Beyond/兩顆心_parsed.cha',
 'Beyond/再見理想_parsed.cha',
 'Beyond/冷雨夜 _parsed.cha',
 'Beyond/勇闖新世界_parsed.cha',
 'Beyond/千金一刻_parsed.cha',
 'Beyond/午夜怨曲_parsed.cha',
 'Beyond/午夜流浪_parsed.cha',
 'Beyond/午夜迷牆_parsed.cha',
 'Beyond/原諒我今天_parsed.cha',
 'Beyond/厭倦寂寞_parsed.cha',
 'Beyond/又是黃昏_parsed.cha',
 'Beyond/可否衝破_parsed.cha',
 'Beyond/可知道_parsed.cha',
 'Beyond/命運是你家_parsed.cha',
 'Beyond/和平與愛_parsed.cha',
 'Beyond/喜歡你_parsed.cha',
 'Beyond/城市獵人_parsed.cha',
 'Beyond/堅持信念_parsed.cha',
 'Beyond/大地_parsed.cha',
 'Beyond/天真的創傷_parsed.cha',
 'Beyond/妄想_parsed.cha',
 'Beyond/孤單一吻_parsed.cha',
 'Beyond/完全地愛吧_parsed.cha',
 'Beyond/完全的擁有_parsed.cha',
 'Beyond/巨人_parsed.cha',
 'Beyond/心內心外_parsed.cha',
 'Beyond/快樂王國_parsed.cha',
 'Beyond/情人_parsed.cha',
 'Beyond/愛你一切_parsed.cha',
 'Beyond/懷念你_parsed.cha',
 'Beyond/我早應該習慣_parsed.cha',
 'Beyond/我是憤怒_parsed.cha',
 'Beyond/我有我風格_parsed.cha',
 'Beyond/戰勝心魔_parsed.cha',
 'Beyond/摩登時代_parsed.cha',
 'Beyond/撒旦的詛咒_parsed.cha',
 'Beyond/文武英傑宣言_parsed.cha',
 'Beyond/新天地_parsed.cha',
 'Beyond/早班火車_parsed.cha',
 'Beyond/明日世界_parsed.cha',
 'Beyond/昔日舞曲_parsed.cha',
 'Beyond/昨日的牽絆_parsed.cha',
 'Beyond/是錯也再不分_parsed.cha',
 'Beyond/曾是擁有_parsed.cha',
 'Beyond/最後的對話_parsed.cha',
 'Beyond/未曾後悔_parsed.cha',
 'Beyond/未知賽事的長跑_parsed.cha',
 'Beyond/東方寶藏_parsed.cha',
 'Beyond/歲月無聲_parsed.cha',
 'Beyond/每段路_parsed.cha',
 'Beyond/水晶球_parsed.cha',
 'Beyond/永遠等待_parsed.cha',
 'Beyond/沙丘魔女_parsed.cha',
 'Beyond/海闊天空_parsed.cha',
 'Beyond/溫暖的家鄉_parsed.cha',
 'Beyond/灰色的心_parsed.cha',
 'Beyond/灰色軌跡_parsed.cha',
 'Beyond/無名的歌_parsed.cha',
 'Beyond/無悔這一生_parsed.cha',
 'Beyond/無淚的遺憾_parsed.cha',
 'Beyond/無無謂_parsed.cha',
 'Beyond/無盡空虛_parsed.cha',
 'Beyond/無聲的告別_parsed.cha',
 'Beyond/無語問蒼天_parsed.cha',
 'Beyond/爆裂都市_parsed.cha',
 'Beyond/爸爸媽媽_parsed.cha',
 'Beyond/狂人山莊_parsed.cha',
 'Beyond/玻璃箱_parsed.cha',
 'Beyond/現代舞台_parsed.cha',
 'Beyond/相依的心_parsed.cha',
 'Beyond/真的愛你_parsed.cha',
 'Beyond/秘密警察_parsed.cha',
 'Beyond/繼續沉醉_parsed.cha',
 'Beyond/與妳共行_parsed.cha',
 'Beyond/舊日的足跡_parsed.cha',
 'Beyond/衝_parsed.cha',
 'Beyond/衝上雲霄_parsed.cha',
 'Beyond/衝開一切_parsed.cha',
 'Beyond/誰伴我闖蕩_parsed.cha',
 'Beyond/誰來主宰_parsed.cha',
 'Beyond/誰是勇敢_parsed.cha',
 'Beyond/赤紅熱血_parsed.cha',
 'Beyond/走不開的快樂_parsed.cha',
 'Beyond/農民_parsed.cha',
 'Beyond/迷離境界_parsed.cha',
 'Beyond/追憶_parsed.cha',
 'Beyond/送給不知怎去保護環境的人(包括我)_parsed.cha',
 'Beyond/逝去日子_parsed.cha',
 'Beyond/過去與今天_parsed.cha',
 'Beyond/遙望_parsed.cha',
 'Beyond/金屬狂人_parsed.cha',
 'Beyond/長城_parsed.cha',
 'Beyond/隨意飄蕩_parsed.cha',
 'Beyond/願我能_parsed.cha',
 'Beyond/飛越苦海_parsed.cha',
 'Beyond/高溫派對_parsed.cha',
 'Beyond/點解點解_parsed.cha']


# Tokens (i.e. Word Count) 

print(f'Beyond: {len(beyond.words())} tokens')
print(f'Tat Ming Pair: {len(tmp.words())} tokens')

Beyond: 20259 tokens
Tat Ming Pair: 11738 tokens


# Create the list of word frequencies 

word_freq_beyond = beyond.word_frequencies()  
word_freq_tmp = tmp.word_frequencies()  

# Display 10 most common words
print(word_freq_beyond.most_common(10))
print(word_freq_tmp.most_common(10))

[('我', 710), ('的', 689), ('你', 296), ('是', 285), ('在', 182), ('裡', 177), ('著', 171), ('與', 165), ('心', 149), ('妳', 142)]
[('我', 359), ('的', 284), ('你', 274), ('在', 187), ('誰', 141), ('是', 128), ('著', 125), ('這', 99), ('裡', 96), ('愛', 84)]


# Types (i.e. unique words)
# Based on the fact that we have a list of unique words and their frequencies, we can simply look at the length of this list

print(f'Beyond: {len(word_freq_beyond)} types')
print(f'Tat Ming Pair: {len(word_freq_tmp)} types')

Beyond: 3003 types
Tat Ming Pair: 2494 types


# 你 vs 妳

masc_2sg_beyond = beyond.search(character='你')
masc_2sg_tmp = tmp.search(character='你')

fem_2sg_beyond = beyond.search(character='妳')
fem_2sg_tmp = tmp.search(character='妳')

print(len(masc_2sg_beyond))
print(len(masc_2sg_tmp))
print(len(fem_2sg_beyond))
print(len(fem_2sg_tmp))

296
274
142
1


# We import the pandas package to handle "dataframes"

import pandas as pd


# So we want something like this:  
#                 你    妳
# Beyond         296  142
# Tat Ming Pair  271    1

pronouns = pd.DataFrame({'Group': ['Beyond', 'Beyond', 'Tat Ming Pair', 'Tat Ming Pair'],
                   '2SG Gender': ['Masc','Fem','Masc','Fem'],
                   'Frequency': [296,142,271,1]
                   })

# Of course, one should use variable names (e.g. "len(masc_2sg_beyond)") instead of the value (punching in 296)! 

pronouns


# We import the seaborn package to visualize the data
import seaborn as sns
results = sns.catplot(data=pronouns, kind="bar",
          x="Group", y="Frequency", hue="2SG Gender",palette="deep", height=3)


eason_corpus = "https://charles-lam.net/cantopop/eason_27.zip"

eason = pycantonese.read_chat(eason_corpus)


print(f'{eason.n_files()} songs')   # Number of files

print(f'{len(eason.words())} tokens') # Tokens (i.e. Word Count) 

word_freq_eason = eason.word_frequencies() # Create the list of word frequencies 
print(f'{len(word_freq_eason)} types')

27 songs
6201 tokens
1960 types


eason.file_paths()

['eason/1874.cha',
 'eason/2001太空漫遊.cha',
 'eason/24.cha',
 'eason/Shall We Talk.cha',
 'eason/一切還好.cha',
 'eason/下週同樣時間(再見).cha',
 'eason/不如不見.cha',
 'eason/任我行.cha',
 'eason/八里公路.cha',
 'eason/反高潮.cha',
 'eason/單車.cha',
 'eason/夕陽無限好.cha',
 'eason/太陽照常升起.cha',
 'eason/失憶蝴蝶.cha',
 'eason/怕死.cha',
 'eason/我的快樂時代.cha',
 'eason/明年今日.cha',
 'eason/時代曲.cha',
 'eason/時光倒流二十年.cha',
 'eason/最佳損友.cha',
 'eason/最後今晚.cha',
 'eason/沙龍.cha',
 'eason/結束開始.cha',
 'eason/苦瓜.cha',
 'eason/阿牛.cha',
 'eason/陀飛輪.cha',
 'eason/黃金時代.cha']


print(word_freq_eason.most_common(10)) # Display 10 most common words

[('你', 182), ('我', 155), ('的', 136), ('不', 102), ('了', 94), ('一', 89), ('到', 81), ('在', 54), ('過', 49), ('都', 49)]


# Alternatively, this: 

eason.info(True)  #Use "eason.info()" if you want to see less details

27 files
793 utterances
6201 words
       Utterance Count    Word Count  File Path
---  -----------------  ------------  ----------------------------
#1                  25           224  eason/1874.cha
#2                  38           228  eason/2001太空漫遊.cha
#3                  25           202  eason/24.cha
#4                  34           277  eason/Shall We Talk.cha
#5                  31           306  eason/一切還好.cha
#6                  17           207  eason/下週同樣時間(再見).cha
#7                  10            91  eason/不如不見.cha
#8                  31           367  eason/任我行.cha
#9                  18           213  eason/八里公路.cha
#10                 34           188  eason/反高潮.cha
#11                 25           208  eason/單車.cha
#12                 27           210  eason/夕陽無限好.cha
#13                 28           204  eason/太陽照常升起.cha
#14                 28           193  eason/失憶蝴蝶.cha
#15                 35           275  eason/怕死.cha
#16                 28           166  eason/我的快樂時代.cha
#17                 18           197  eason/明年今日.cha
#18                 31           247  eason/時代曲.cha
#19                 23           184  eason/時光倒流二十年.cha
#20                 47           332  eason/最佳損友.cha
#21                 27           196  eason/最後今晚.cha
#22                 31           198  eason/沙龍.cha
#23                 24           175  eason/結束開始.cha
#24                 35           348  eason/苦瓜.cha
#25                 36           240  eason/阿牛.cha
#26                 43           300  eason/陀飛輪.cha
#27                 44           225  eason/黃金時代.cha


# import the necessary packages 

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer


#Scikit Learn's TFIDF function requires one list, with each item being a space-separated text. 
word_list = []

for file in eason:
  lines = ' '.join(file.words())
  word_list.append(lines)

# len(words_in_list)


vectorizer = TfidfVectorizer()

vectors = vectorizer.fit_transform(word_list) # Or switch to whatever data set you have!
feature_names = vectorizer.get_feature_names_out()
dense = vectors.todense()
denselist = dense.tolist()

df = pd.DataFrame(denselist, columns=feature_names) # Here we name the output "df" (short for "dataframe")


df_transposed = df.transpose()
df_transposed.round(3)


for row in df.index:
    print(f'Keyword with the highest TF-IDF in {row} is:', max(round(df.iloc[row],3)))

Keyword with the highest TF-IDF in 0 is: 0.373
Keyword with the highest TF-IDF in 1 is: 0.355
Keyword with the highest TF-IDF in 2 is: 0.554
Keyword with the highest TF-IDF in 3 is: 0.314
Keyword with the highest TF-IDF in 4 is: 0.498
Keyword with the highest TF-IDF in 5 is: 0.387
Keyword with the highest TF-IDF in 6 is: 0.179
Keyword with the highest TF-IDF in 7 is: 0.431
Keyword with the highest TF-IDF in 8 is: 0.291
Keyword with the highest TF-IDF in 9 is: 0.461
Keyword with the highest TF-IDF in 10 is: 0.356
Keyword with the highest TF-IDF in 11 is: 0.412
Keyword with the highest TF-IDF in 12 is: 0.216
Keyword with the highest TF-IDF in 13 is: 0.423
Keyword with the highest TF-IDF in 14 is: 0.307
Keyword with the highest TF-IDF in 15 is: 0.313
Keyword with the highest TF-IDF in 16 is: 0.314
Keyword with the highest TF-IDF in 17 is: 0.301
Keyword with the highest TF-IDF in 18 is: 0.377
Keyword with the highest TF-IDF in 19 is: 0.334
Keyword with the highest TF-IDF in 20 is: 0.461
Keyword with the highest TF-IDF in 21 is: 0.241
Keyword with the highest TF-IDF in 22 is: 0.268
Keyword with the highest TF-IDF in 23 is: 0.203
Keyword with the highest TF-IDF in 24 is: 0.49
Keyword with the highest TF-IDF in 25 is: 0.402
Keyword with the highest TF-IDF in 26 is: 0.44


song_list = []

for item in eason.file_paths():
  song_title = item.strip("'eason/")
  song_title = song_title.strip(".cha'")
  song_list.append(song_title) 

df = df.rename(index=dict(zip([*range(0, len(eason.file_paths()), 1)],song_list)))

df.transpose()


import numpy as np

top3words = pd.DataFrame(df.columns.values[np.argsort(-df.values, axis=1)[:, :3]], 
                  index=df.index,
                  columns = ['1st','2nd','3rd']).reset_index()


top3words

	index	1st	2nd	3rd
0	1874	一八七四	出生	相識
1	2001太空漫遊	忽然	零零零一	太空
2	24	小時	廿四	愉快
3	Shall We Talk	shall	talk	we
4	一切還好	其實	期望	得到
5	下週同樣時間(再見)	預約	仍然	齊集
6	不如不見	小店	渴望	早機
7	任我行	何時	頑童	那麼
8	八里公路	不錯	香港	東西
9	反高潮	高潮	寂寥	跌落
10	單車	難離難捨	荒野	茫茫
11	夕陽無限好	夕陽	黃昏	無限
12	太陽照常升起	人潮	星斗	流動
13	失憶蝴蝶	隨時	沒有	歡喜
14	怕死	不想	工作	享受
15	我的快樂時代	代價	毫無	幸福
16	明年今日	改變	今日	明年
17	時代曲	如何	奈何	剩下
18	時光倒流二十年	當時	童年	遺憾
19	最佳損友	朋友	有沒有	某某
20	最後今晚	今晚	最後	以後
21	沙龍	流露	感情	態度
22	結束開始	開始	結束	逐步
23	苦瓜	昇華	大悟大徹	討厭
24	阿牛	不甘心	開心	不能
25	陀飛輪	心跳	幾多	付出
26	黃金時代	回來	結伴	自從

Analyzing Cantopop with Python ¶

Goal of this Demo¶

Analyzing Cantopop with Python¶

Three things you can do after this talk¶

Task 1 - Building your own (small) corpus¶

Preprocessing¶

Problems¶

Solution: Manual clean-up¶

Creating machine-readable corpus from raw text¶

Converting to separate CHA files.¶

Storing the corpus as a zip file.¶

Searching directly from zip files.¶

Webscraping¶

Task 2 - Basic Statistics in Corpus¶

Load the corpus¶

Basic Statistics¶

Finding pronouns¶

Visualize!¶

Task 3 - Extracting Meaning in a Lyrics Corpus¶

Load the songs¶

Let's quickly see the basic information of what we have in the corpus¶

TF-IDF (term frequency - inverse document frequency)¶

The implementation in Python is rather simple!¶

There are really just two variable names to change.¶

Find the keywords with higher TF-IDF¶

Putting all the song titles back¶

Thank you!¶

多謝各位！¶

You can find this demo as slides on¶

	Group	2SG Gender	Frequency
0	Beyond	Masc	296
1	Beyond	Fem	142
2	Tat Ming Pair	Masc	271
3	Tat Ming Pair	Fem	1

	0	1	2	3	4	5	6	7	8	9	...	17	18	19	20	21	22	23	24	25	26
and	0.0	0.000	0.0	0.063	0.0	0.000	0.0	0.0	0.0	0.0	...	0.0	0.0	0.000	0.0	0.0	0.0	0.000	0.0	0.0	0.000
shall	0.0	0.000	0.0	0.314	0.0	0.000	0.0	0.0	0.0	0.0	...	0.0	0.0	0.000	0.0	0.0	0.0	0.000	0.0	0.0	0.000
talk	0.0	0.000	0.0	0.314	0.0	0.000	0.0	0.0	0.0	0.0	...	0.0	0.0	0.000	0.0	0.0	0.0	0.000	0.0	0.0	0.000
we	0.0	0.000	0.0	0.314	0.0	0.000	0.0	0.0	0.0	0.0	...	0.0	0.0	0.000	0.0	0.0	0.0	0.000	0.0	0.0	0.000
一世	0.0	0.000	0.0	0.000	0.0	0.000	0.0	0.0	0.0	0.0	...	0.0	0.0	0.063	0.0	0.0	0.0	0.000	0.0	0.0	0.000
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
黃昏	0.0	0.000	0.0	0.000	0.0	0.000	0.0	0.0	0.0	0.0	...	0.0	0.0	0.000	0.0	0.0	0.0	0.000	0.0	0.0	0.000
黃葉	0.0	0.000	0.0	0.000	0.0	0.000	0.0	0.0	0.0	0.0	...	0.0	0.0	0.000	0.0	0.0	0.0	0.068	0.0	0.0	0.000
黃金	0.0	0.000	0.0	0.000	0.0	0.000	0.0	0.0	0.0	0.0	...	0.0	0.0	0.000	0.0	0.0	0.0	0.000	0.0	0.0	0.147
黑黑	0.0	0.059	0.0	0.000	0.0	0.000	0.0	0.0	0.0	0.0	...	0.0	0.0	0.000	0.0	0.0	0.0	0.000	0.0	0.0	0.000
齊集	0.0	0.000	0.0	0.000	0.0	0.193	0.0	0.0	0.0	0.0	...	0.0	0.0	0.000	0.0	0.0	0.0	0.000	0.0	0.0	0.000

	1874	2001太空漫遊	24	Shall We Talk	一切還好	下週同樣時間(再見)	不如不見	任我行	八里公路	反高潮	...	時代曲	時光倒流二十年	最佳損友	最後今晚	沙龍	結束開始	苦瓜	阿牛	陀飛輪	黃金時代
and	0.0	0.000000	0.0	0.062785	0.0	0.000000	0.0	0.0	0.0	0.0	...	0.0	0.0	0.000000	0.0	0.0	0.0	0.000000	0.0	0.0	0.000000
shall	0.0	0.000000	0.0	0.313924	0.0	0.000000	0.0	0.0	0.0	0.0	...	0.0	0.0	0.000000	0.0	0.0	0.0	0.000000	0.0	0.0	0.000000
talk	0.0	0.000000	0.0	0.313924	0.0	0.000000	0.0	0.0	0.0	0.0	...	0.0	0.0	0.000000	0.0	0.0	0.0	0.000000	0.0	0.0	0.000000
we	0.0	0.000000	0.0	0.313924	0.0	0.000000	0.0	0.0	0.0	0.0	...	0.0	0.0	0.000000	0.0	0.0	0.0	0.000000	0.0	0.0	0.000000
一世	0.0	0.000000	0.0	0.000000	0.0	0.000000	0.0	0.0	0.0	0.0	...	0.0	0.0	0.062588	0.0	0.0	0.0	0.000000	0.0	0.0	0.000000
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
黃昏	0.0	0.000000	0.0	0.000000	0.0	0.000000	0.0	0.0	0.0	0.0	...	0.0	0.0	0.000000	0.0	0.0	0.0	0.000000	0.0	0.0	0.000000
黃葉	0.0	0.000000	0.0	0.000000	0.0	0.000000	0.0	0.0	0.0	0.0	...	0.0	0.0	0.000000	0.0	0.0	0.0	0.067767	0.0	0.0	0.000000
黃金	0.0	0.000000	0.0	0.000000	0.0	0.000000	0.0	0.0	0.0	0.0	...	0.0	0.0	0.000000	0.0	0.0	0.0	0.000000	0.0	0.0	0.146567
黑黑	0.0	0.059226	0.0	0.000000	0.0	0.000000	0.0	0.0	0.0	0.0	...	0.0	0.0	0.000000	0.0	0.0	0.0	0.000000	0.0	0.0	0.000000
齊集	0.0	0.000000	0.0	0.000000	0.0	0.193259	0.0	0.0	0.0	0.0	...	0.0	0.0	0.000000	0.0	0.0	0.0	0.000000	0.0	0.0	0.000000