python - 马尔可夫分析,格式化
问题描述
我有一个程序,它从文本文件中读取大量文本,然后根据文本内容随机化内容以显示为短篇小说。该程序有效,但最后一部分,我显示的材料非常笨重且效率不高,我想知道是否有人对我如何更有效地接收文本然后将其作为字符串显示给用户有任何想法但是允许它跨越多行(本质上是换行),这样它就不仅仅是一个巨大的文本字符串,一直延伸到控制台的右侧。
from __future__ import print_function, division
import sys
import random
# global variables
suffix_map = {} # map from prefixes to a list of suffixes
prefix = () # current tuple of words
big_list = []
def process_file(filename, order=2):
"""Reads a file and performs Markov analysis.
filename: string
order: integer number of words in the prefix
returns: map from prefix to list of possible suffixes.
"""
fp = open(filename)
for line in fp:
for word in line.rstrip().split():
process_word(word, order)
def process_word(word, order=3):
"""Processes each word.
word: string
order: integer
During the first few iterations, all we do is store up the words;
after that we start adding entries to the dictionary.
"""
global prefix
if len(prefix) < order:
prefix += (word,)
return
try:
suffix_map[prefix].append(word)
except KeyError:
# if there is no entry for this prefix, make one
suffix_map[prefix] = [word]
prefix = shift(prefix, word)
def random_text(n=300):
"""Generates random wordsfrom the analyzed text.
Starts with a random prefix from the dictionary.
n: number of words to generate
"""
global big_list
# choose a random prefix (not weighted by frequency)
start = random.choice(list(suffix_map.keys()))
for i in range(n):
suffixes = suffix_map.get(start, None)
if suffixes == None:
random_text(n-i)
return
# choose a random suffix
word = random.choice(suffixes)
big_list.append(word + " ")
start = shift(start, word)
def shift(t, word):
"""Forms a new tuple by removing the head and adding word to the tail.
t : tuple of strings
word: string
Returns: tuple of strings
"""
return t[1:] + (word,)
def list_to_str_format():
global big_list
whole = " ".join(str(i) for i in big_list)
# 25 words per line
l1 = big_list[:25]
l2 = big_list[26:50]
l3 = big_list[51:75]
l4 = big_list[76:100]
l5 = big_list[101:125]
l6 = big_list[126:150]
l7 = big_list[151:175]
l8 = big_list[176:200]
l9 = big_list[201:225]
l10 = big_list[226:250]
l11 = big_list[256:275]
l12 = big_list[276:300]
str_1 = " ".join(str(i) for i in l1).capitalize()
str_2 = " ".join(str(i) for i in l2)
str_3 = " ".join(str(i) for i in l3)
str_4 = " ".join(str(i) for i in l4)
str_5 = " ".join(str(i) for i in l5)
str_6 = " ".join(str(i) for i in l6)
str_7 = " ".join(str(i) for i in l7)
str_8 = " ".join(str(i) for i in l8)
str_9 = " ".join(str(i) for i in l9)
str_10 = " ".join(str(i) for i in l10)
str_11 = " ".join(str(i) for i in l11)
str_12 = " ".join(str(i) for i in l12)
print(str_1)
print(str_2)
print(str_3)
print(str_4)
print(str_5)
print(str_6)
print(str_7)
print(str_8)
print(str_9)
print(str_10)
print(str_11)
print(str_12)
def main(filename, n=300, order=3):
try:
n = int(n)
order = int(order)
except ValueError as e:
print('Usage: %d filename [# of words] [prefix length]' % e)
else:
process_file(filename, order)
random_text(n)
list_to_str_format()
print()
main('C:\\Users\\Desktop\\TheBrothersKaramazov.txt')
解决方案
我允许自己改变你的加入模式,这形成了一个双倍的空间。你必须导入模块re
def list_to_str_format(line_length=80):
global big_list
whole = "".join(str(i) for i in big_list)
regex = re.compile('(.*?(\s))*')
while whole != "":
break_pos = regex.match(whole[:line_length]).end()
print(whole[:break_pos])
whole = whole[break_pos:]
推荐阅读
- node.js - “mustacheExpress().cache = null;”是什么意思?
- discord.py - 如何在一个参数中放入多个单词 discord.py
- php - 在laravel中创建新用户帐户后无法发送邮件
- javascript - 无法删除垂直元素之间的空间
- android - 片段中 TextSwitcher 的动态 TextSize
- angular - Angular 意外令牌
- jenkins-pipeline - 从 Jenkins 调用 Ansible 时出现问题
- html - 如何在汉堡菜单中相互对齐文本?
- r - 在 R 中转换每月日期
- angular - Firebase 函数错误:DeprecationWarning:grpc.load - 当网站运行时