当前位置: 首页 > news >正文

Model-Free TD Control: Sarsa

import time
import random
# 相对于Q 效果会差一些
class Env():def __init__(self, length, height):# define the height and length of the mapself.length = lengthself.height = height# define the agent's start positionself.x = 0self.y = 0def render(self, frames=50):for i in range(self.height):if i == 0: # cliff is in the line 0line = ['S'] + ['x']*(self.length - 2) + ['T'] # 'S':start, 'T':terminal, 'x':the cliffelse:line = ['.'] * self.lengthif self.x == i:line[self.y] = 'o' # mark the agent's position as 'o'print(''.join(line))print('\033['+str(self.height+1)+'A')  # printer go back to top-lefttime.sleep(1.0 / frames)def step(self, action):"""4 legal actions, 0:up, 1:down, 2:left, 3:right"""change = [[0, 1], [0, -1], [-1, 0], [1, 0]]self.x = min(self.height - 1, max(0, self.x + change[action][0]))self.y = min(self.length - 1, max(0, self.y + change[action][1]))states = [self.x, self.y]reward = -1 # 每一步的奖赏terminal = Falseif self.x == 0: # if agent is on the cliff line "SxxxxxT"if self.y > 0: # if agent is not on the start positionterminal = Trueif self.y != self.length - 1: # if agent fallsreward = -100 # 进入悬崖的奖赏return reward, states, terminaldef reset(self):self.x = 0self.y = 0class Q_table():def __init__(self, length, height, actions=4, alpha=0.1, gamma=0.9):self.table = [0] * actions * length * height # initialize all Q(s,a) to zeroself.actions = actionsself.length = lengthself.height = heightself.alpha = alphaself.gamma = gammadef _index(self, a, x, y):"""Return the index of Q([x,y], a) in Q_table."""return a * self.height * self.length + x * self.length + ydef _epsilon(self):return 0.1 # 可更改# version for better convergence:# """At the beginning epsilon is 0.2, after 300 episodes decades to 0.05, and eventually go to 0."""# return 20. / (num_episode + 100)def take_action(self, x, y, num_episode):"""epsilon-greedy action selection"""if random.random() < self._epsilon():return int(random.random() * 4)else:actions_value = [self.table[self._index(a, x, y)] for a in range(self.actions)]return actions_value.index(max(actions_value))def epsilon_q(self, x, y): # 更改actions_value = [self.table[self._index(a, x, y)] for a in range(self.actions)]# 更改return max(actions_value) if random.random() > self._epsilon()  else actions_value[int(random.random() * 4)]def update(self, a, s0, s1, r, is_terminated):# both s0, s1 have the form [x,y]q_predict = self.table[self._index(a, s0[0], s0[1])]if not is_terminated:q_target = r + self.gamma * self.epsilon_q(s1[0], s1[1]) # 更改else:q_target = rself.table[self._index(a, s0[0], s0[1])] += self.alpha * (q_target - q_predict)def cliff_walk():env = Env(length=12, height=4)table = Q_table(length=12, height=4)for num_episode in range(5000):# within the whole learning processepisodic_reward = 0is_terminated = Falses0 = [0, 0]while not is_terminated:# within one episodeaction = table.take_action(s0[0], s0[1], num_episode)r, s1, is_terminated = env.step(action)table.update(action, s0, s1, r, is_terminated)episodic_reward += r# env.render(frames=100)s0 = s1if num_episode % 1 == 0:print("Episode: {}, Score: {}".format(num_episode, episodic_reward))env.reset()cliff_walk()

 Episode: 0, Score: -100
Episode: 20, Score: -147
Episode: 40, Score: -48
Episode: 60, Score: -131
Episode: 80, Score: -54
Episode: 100, Score: -63
Episode: 120, Score: -39
Episode: 140, Score: -100
Episode: 160, Score: -38
Episode: 180, Score: -31
Episode: 200, Score: -28
Episode: 220, Score: -25
Episode: 240, Score: -17
Episode: 260, Score: -26
Episode: 280, Score: -103
Episode: 300, Score: -17
Episode: 320, Score: -100
Episode: 340, Score: -17
Episode: 360, Score: -21
Episode: 380, Score: -23
Episode: 400, Score: -19
Episode: 420, Score: -24
Episode: 440, Score: -23
Episode: 460, Score: -100
Episode: 480, Score: -16
Episode: 500, Score: -17
Episode: 520, Score: -28
Episode: 540, Score: -15
Episode: 560, Score: -15
Episode: 580, Score: -17
Episode: 600, Score: -100
Episode: 620, Score: -19
Episode: 640, Score: -19
Episode: 660, Score: -102
Episode: 680, Score: -17
Episode: 700, Score: -16
Episode: 720, Score: -17
Episode: 740, Score: -19
Episode: 760, Score: -115
Episode: 780, Score: -15
Episode: 800, Score: -17
Episode: 820, Score: -16
Episode: 840, Score: -15
Episode: 860, Score: -15
Episode: 880, Score: -17
Episode: 900, Score: -17
Episode: 920, Score: -19
Episode: 940, Score: -17
Episode: 960, Score: -18
Episode: 980, Score: -23
Episode: 1000, Score: -19
Episode: 1020, Score: -18
Episode: 1040, Score: -17
Episode: 1060, Score: -20
Episode: 1080, Score: -17
Episode: 1100, Score: -17
Episode: 1120, Score: -19
Episode: 1140, Score: -21
Episode: 1160, Score: -24
Episode: 1180, Score: -20
Episode: 1200, Score: -21
Episode: 1220, Score: -19
Episode: 1240, Score: -19
Episode: 1260, Score: -17
Episode: 1280, Score: -23
Episode: 1300, Score: -17
Episode: 1320, Score: -15
Episode: 1340, Score: -15
Episode: 1360, Score: -15
Episode: 1380, Score: -20
Episode: 1400, Score: -19
Episode: 1420, Score: -17
Episode: 1440, Score: -15
Episode: 1460, Score: -17
Episode: 1480, Score: -15
Episode: 1500, Score: -15
Episode: 1520, Score: -15
Episode: 1540, Score: -15
Episode: 1560, Score: -18
Episode: 1580, Score: -17
Episode: 1600, Score: -15
Episode: 1620, Score: -20
Episode: 1640, Score: -17
Episode: 1660, Score: -117
Episode: 1680, Score: -21
Episode: 1700, Score: -21
Episode: 1720, Score: -22
Episode: 1740, Score: -18
Episode: 1760, Score: -19
Episode: 1780, Score: -17
Episode: 1800, Score: -19
Episode: 1820, Score: -19
Episode: 1840, Score: -17
Episode: 1860, Score: -20
Episode: 1880, Score: -17
Episode: 1900, Score: -21
Episode: 1920, Score: -17
Episode: 1940, Score: -17
Episode: 1960, Score: -15
Episode: 1980, Score: -17
Episode: 2000, Score: -15
Episode: 2020, Score: -19
Episode: 2040, Score: -17
Episode: 2060, Score: -19
Episode: 2080, Score: -18
Episode: 2100, Score: -17
Episode: 2120, Score: -18
Episode: 2140, Score: -18
Episode: 2160, Score: -17
Episode: 2180, Score: -21
Episode: 2200, Score: -20
Episode: 2220, Score: -21
Episode: 2240, Score: -18
Episode: 2260, Score: -17
Episode: 2280, Score: -17
Episode: 2300, Score: -18
Episode: 2320, Score: -18
Episode: 2340, Score: -17
Episode: 2360, Score: -17
Episode: 2380, Score: -19
Episode: 2400, Score: -18
Episode: 2420, Score: -100
Episode: 2440, Score: -19
Episode: 2460, Score: -23
Episode: 2480, Score: -19
Episode: 2500, Score: -19
Episode: 2520, Score: -18
Episode: 2540, Score: -18
Episode: 2560, Score: -19
Episode: 2580, Score: -21
Episode: 2600, Score: -18
Episode: 2620, Score: -21
Episode: 2640, Score: -20
Episode: 2660, Score: -17
Episode: 2680, Score: -19
Episode: 2700, Score: -18
Episode: 2720, Score: -19
Episode: 2740, Score: -22
Episode: 2760, Score: -19
Episode: 2780, Score: -22
Episode: 2800, Score: -17
Episode: 2820, Score: -17
Episode: 2840, Score: -18
Episode: 2860, Score: -17
Episode: 2880, Score: -21
Episode: 2900, Score: -21
Episode: 2920, Score: -17
Episode: 2940, Score: -18
Episode: 2960, Score: -17
Episode: 2980, Score: -19
Episode: 3000, Score: -18
Episode: 3020, Score: -17
Episode: 3040, Score: -17
Episode: 3060, Score: -21
Episode: 3080, Score: -15
Episode: 3100, Score: -19
Episode: 3120, Score: -17
Episode: 3140, Score: -17
Episode: 3160, Score: -17
Episode: 3180, Score: -17
Episode: 3200, Score: -17
Episode: 3220, Score: -18
Episode: 3240, Score: -19
Episode: 3260, Score: -19
Episode: 3280, Score: -17
Episode: 3300, Score: -18
Episode: 3320, Score: -17
Episode: 3340, Score: -25
Episode: 3360, Score: -18
Episode: 3380, Score: -17
Episode: 3400, Score: -19
Episode: 3420, Score: -17
Episode: 3440, Score: -15
Episode: 3460, Score: -118
Episode: 3480, Score: -17
Episode: 3500, Score: -15
Episode: 3520, Score: -17
Episode: 3540, Score: -19
Episode: 3560, Score: -21
Episode: 3580, Score: -17
Episode: 3600, Score: -17
Episode: 3620, Score: -17
Episode: 3640, Score: -19
Episode: 3660, Score: -15
Episode: 3680, Score: -15
Episode: 3700, Score: -100
Episode: 3720, Score: -17
Episode: 3740, Score: -17
Episode: 3760, Score: -100
Episode: 3780, Score: -100
Episode: 3800, Score: -17
Episode: 3820, Score: -18
Episode: 3840, Score: -19
Episode: 3860, Score: -17
Episode: 3880, Score: -19
Episode: 3900, Score: -19
Episode: 3920, Score: -19
Episode: 3940, Score: -18
Episode: 3960, Score: -18
Episode: 3980, Score: -15
Episode: 4000, Score: -19
Episode: 4020, Score: -17
Episode: 4040, Score: -20
Episode: 4060, Score: -19
Episode: 4080, Score: -17
Episode: 4100, Score: -19
Episode: 4120, Score: -15
Episode: 4140, Score: -22
Episode: 4160, Score: -17
Episode: 4180, Score: -22
Episode: 4200, Score: -18
Episode: 4220, Score: -18
Episode: 4240, Score: -19
Episode: 4260, Score: -100
Episode: 4280, Score: -17
Episode: 4300, Score: -19
Episode: 4320, Score: -17
Episode: 4340, Score: -19
Episode: 4360, Score: -21
Episode: 4380, Score: -22
Episode: 4400, Score: -21
Episode: 4420, Score: -18
Episode: 4440, Score: -22
Episode: 4460, Score: -17
Episode: 4480, Score: -20
Episode: 4500, Score: -17
Episode: 4520, Score: -17
Episode: 4540, Score: -17
Episode: 4560, Score: -19
Episode: 4580, Score: -17
Episode: 4600, Score: -19
Episode: 4620, Score: -24
Episode: 4640, Score: -18
Episode: 4660, Score: -17
Episode: 4680, Score: -17
Episode: 4700, Score: -19
Episode: 4720, Score: -15
Episode: 4740, Score: -17
Episode: 4760, Score: -19
Episode: 4780, Score: -17
Episode: 4800, Score: -19
Episode: 4820, Score: -19
Episode: 4840, Score: -21
Episode: 4860, Score: -19
Episode: 4880, Score: -18
Episode: 4900, Score: -17
Episode: 4920, Score: -20
Episode: 4940, Score: -17
Episode: 4960, Score: -17
Episode: 4980, Score: -17

相关文章:

Model-Free TD Control: Sarsa

import time import random # 相对于Q 效果会差一些 class Env():def __init__(self, length, height):# define the height and length of the mapself.length lengthself.height height# define the agents start positionself.x 0self.y 0def render(self, frames50):fo…...

CloudBase CMS的开发注意事项

引言 在进行基于云开发的微信小程序开发时为了减轻工作量打算用CloudBase CMS来减轻工作量&#xff0c;随后去了解并体验了CloudBase CMS的使用&#xff0c;总体来说还有些许问题没有解决&#xff0c;对减轻后台管理工作并没有起到很大的作用。 项目情景 使用CloudBase CMS来管…...

大佬联合署名!反对 ACL 设置匿名期!

夕小瑶科技说 原创 作者 | 智商掉了一地、Python 近日&#xff0c;自然语言处理领域的多位知名学者联合发起了一项反对 ACL 设置匿名期的联合署名行动&#xff0c;包括著名学者 William Wang 和 Yoav Goldberg 在内&#xff0c;还有Christopher Potts、Hal Daume、Luke Zettl…...

【JavaSE】Java基础语法(十四):Static

文章目录 概述特点与应用注意事项为什么一个静态方法中只能访问用static修饰的成员? 概述 Java中的static是一个修饰符&#xff08;也可称关键字&#xff09;&#xff0c;可以用于修饰变量、方法和代码块。 特点与应用 static修饰的成员具有以下特点&#xff1a; 被类的所有对…...

1.Linux初识

在 Linux 系统中&#xff0c;sudo 是一个重要的命令&#xff0c;可以允许普通用户以管理员权限来运行特定的命令。通过 sudo 命令&#xff0c;普通用户可以暂时获取管理员权限&#xff0c;执行需要管理员身份才能执行的操作。 下面是一些关于 sudo 命令的用法&#xff1a; 以管…...

进程(二)

这一节我们写个MFC剪切板程序 1.下载相应的组件 工具->工具视图&#xff0c;因为之前已经下载过一部分了&#xff0c;这里如果创建MFC报错的话&#xff0c;就要把没下载的补上 此项目需要MFC库 解决方法 2.创建MFC程序 3.打开资源视图&#xff0c;直接在菜单栏顶部搜索…...

《消息队列高手课》课程笔记(二)

消息模型&#xff1a;主题和队列有什么区别&#xff1f; 两类消息模型 早期的消息队列&#xff0c;就是按照“队列”的数据结构来设计的。 生产者&#xff08;Producer&#xff09;发消息就是入队操作&#xff0c;消费者&#xff08;Consumer&#xff09;收消息就是出队也就是…...

以“智”提质丨信创呼叫

随着人工智能、大数据、云计算等新兴技术飞速发展&#xff0c;呼叫中心、全媒体智能客服等现已被广泛应用于多个行业领域。其中&#xff0c;呼叫中心作为政企对外服务的重要窗口&#xff0c;已从“传统电话营销”发展到“智能呼叫中心”阶段&#xff0c;以客户服务为核心&#…...

Pool与PG的说明以及Ceph的IO流程

Pool与PG的说明以及Ceph的IO流程 Pool与PG Ceph中的数据是以对象的形式存储在存储池(pool)中的。每个存储池都被划分为若干个存储组(PG)&#xff0c;每个存储组同时也是一个数据分片(shard)。存储组是Ceph用来实现数据的分布式存储和高可用的重要组成部分。每个存储组包含若干…...

20230529_Hadoop_集群操作命令

HDFS_集群操作命令&#xff1a; 一、集群启停命令 # 启动Hadoop的HDFS进程start-dfs.sh# 关闭Hadoop的HDFS进程stop-dfs.sh# 单独关闭某一个进程hadoop-daemon.sh start[/stop] namenode[/datanode/secondarynamenode]二、HDFS文件系统的基本信息 数据的路径表达方式&#xff…...

边缘计算AI硬件智能分析网关V1版的接入流程与使用步骤

我们的AI边缘计算网关硬件——智能分析网关目前有两个版本&#xff1a;V1版与V2版&#xff0c;两个版本都能实现对监控视频的智能识别和分析&#xff0c;支持抓拍、记录、告警等&#xff0c;在AI算法的种类上和视频接入上&#xff0c;两个版本存在些许的区别。V1的基础算法有人…...

【redis】Stream、String 超详细介绍

文章目录 一、Stream1.1 写入数据XADD条目 ID 的格式 1.2 获取数据XRANGE 和 XREVRANGEXREAD 监听新条目非阻塞形式阻塞形式 1.3 消费者组XGROUP 创建消费者组XREADGROUP 通过消费者组消费XACK 确认消息消费者组示例 1.4 XPENDING 和 XCLAIM 认领 其他消费者 的待处理消息XPEND…...

算法基础学习笔记——⑫最小生成树\二分图\质数\约数

✨博主&#xff1a;命运之光 ✨专栏&#xff1a;算法基础学习 目录 ✨最小生成树 &#x1f353;朴素Prim &#x1f353;Kruskal算法 ✨二分图 &#x1f353;匈牙利算法 ✨质数 &#x1f353;&#xff08;1&#xff09;质数的判定——试除法 &#x1f353;&#xff08;2&…...

了解信号的传输方式、编码与调制、信道的极限容量

1.了解信号的传输方式、编码与调制、信道的极限容量 笔记来源&#xff1a; 湖科大教书匠&#xff1a;传输方式 声明&#xff1a;该学习笔记来自湖科大教书匠&#xff0c;笔记仅做学习参考 1.1 了解信号的传输方式 串行传输与并行传输 同步传输与异步传输 为什么需要收发双发…...

SpringBoot自动配置原理总结

1、我们需要从主启动类的SpringBootApplication注解开始分析&#xff1a; SpringBootApplication是一个复合注解&#xff0c;进入以后看到主要包括以下三个注解&#xff1a; SpringBootConfiguration EnableAutoConfiguration ComponentScan(excludeFilters { Filter(type …...

【LeetCode: 410. 分割数组的最大值 | 暴力递归=>记忆化搜索=>动态规划 】

&#x1f680; 算法题 &#x1f680; &#x1f332; 算法刷题专栏 | 面试必备算法 | 面试高频算法 &#x1f340; &#x1f332; 越难的东西,越要努力坚持&#xff0c;因为它具有很高的价值&#xff0c;算法就是这样✨ &#x1f332; 作者简介&#xff1a;硕风和炜&#xff0c;…...

内核对象和两种同步

概念 Windows 中每个内核对象都只是一个内存块&#xff0c;它由操作系统内核分配&#xff0c;并只能由操作系统内核进 行访问 它的所有者&#xff1a;内核对象的所有者是操作系统内核&#xff0c;而非进程&#xff0c;也就是说当进程退出&#xff0c;内核对象不一定会销毁 法…...

水表远程监控系统有什么功能吗?

水表远程监控系统是通过远程传输水表数据&#xff0c;实现对水表的远程监控和管理的一种智能化系统。它主要具备以下功能&#xff1a; 1.远程抄表功能&#xff1a;通过远程传输技术&#xff0c;实现对水表的远程抄表和监控&#xff0c;无需人工上门抄表&#xff0c;节省人力成本…...

zabbix自定义监控

一、案例操作&#xff1a;自定义监控内容 案列&#xff1a;自定义监控客户端服务器登录的人数 需求&#xff1a;限制登录人数不超过 3 个&#xff0c;超过 3 个就发出报警信息 1、自定义监控内容的操作步骤 1.1 在客户端创建自定义 key 明确需要执行的 linux 命令 who | …...

【AUTOSAR】Com通讯栈配置说明(四)---- Nm模块

Nm模块 NmGlobalConfig NmGlobalConstants NmRxIndicationCallback: callback 函数 NmCycletimeMainFunction:Nm 主函数调用周期 NmDevErrorDetect: 是否支持DET NmVersionInfoApi: 是否支持获取版本信息api PduR模块 PduRBswModules PduRBswModuleRef&#xff1a;关联的BS…...

5种方法高效解决DWG文件格式兼容性问题:LibreDWG开源CAD库完整指南

5种方法高效解决DWG文件格式兼容性问题&#xff1a;LibreDWG开源CAD库完整指南 【免费下载链接】libredwg Official mirror of libredwg. With CI hooks and nightly releases. PRs ok 项目地址: https://gitcode.com/gh_mirrors/li/libredwg LibreDWG是一个免费开源的C…...

STM32以太网实战:手把手教你配置SMI接口,搞定PHY寄存器读写

STM32以太网实战&#xff1a;手把手教你配置SMI接口&#xff0c;搞定PHY寄存器读写 在嵌入式以太网开发中&#xff0c;PHY芯片的配置往往是项目成败的关键。很多开发者能够轻松完成MAC层的初始化&#xff0c;却在PHY寄存器读写这个环节卡壳——明明硬件连接正确&#xff0c;却无…...

深入Linux内核:图解PTP硬件时间戳(HW Timestamp)从网卡到用户空间的完整路径

深入Linux内核&#xff1a;图解PTP硬件时间戳从网卡到用户空间的完整路径 1. 高精度时间同步的技术演进与PTP核心价值 在分布式系统与工业自动化领域&#xff0c;微秒级甚至纳秒级的时间同步已成为刚需。传统NTP协议受限于软件实现和网络抖动&#xff0c;精度通常只能达到毫秒级…...

铁路局信息化综合管理平台总体设计方案

一、五层架构支撑全域智能化 平台以感知、网络、数据、平台、应用五层架构贯通铁路资源数字化链路&#xff0c;为铁路局打造横向到边、纵向到底的智能化管理底座。 应用层-业务功能模块–物资仓储、卧具跟踪、工具管理、档案管理等业务功能模块 平台层-微服务与技术中心–提…...

Android Native内存泄漏系统化分析与排查实战指南

引言 在Android开发中,内存管理是一个至关重要的环节,直接影响应用的性能、稳定性和用户体验。随着应用复杂度增加,内存泄漏问题日益突出,尤其是在Native层(如C/C++代码),其排查难度更大。Native内存泄漏可能导致应用崩溃、卡顿或系统资源耗尽,因此系统化分析和排查成…...

Android主流架构演进:从MVC到MVI,聚焦MVVM核心实践

引言 在Android应用开发中,架构设计是确保代码可维护性、可测试性和可扩展性的关键。随着技术演进,主流架构从传统的MVC(Model-View-Controller)逐步过渡到MVP(Model-View-Presenter)、MVVM(Model-View-ViewModel),再到新兴的MVI(Model-View-Intent)。这种演进反映…...

ReAct 循环的 50 行 Go 实现,逐行拆解

ReAct 循环的 50 行 Go 实现&#xff0c;逐行拆解 系列「企业级 AI Agent 实现拆解」第三篇。上一篇讲了 Session 聚合根和状态机——状态怎么迁移、事件怎么发、终态怎么判。但状态机本身是静态的&#xff0c;谁在驱动这些迁移&#xff1f; 答案是 RunTurnHandler.Handle()——…...

内连接,左连接,右连接怎么区别开来?

区分这三种连接其实非常简单&#xff0c;核心就在于看**“谁的数据必须全部保留&#xff0c;谁的数据没有匹配就要被过滤掉”**。 为了让你彻底搞懂&#xff0c;我们可以把 user 表&#xff08;用户&#xff09;和 orders 表&#xff08;订单&#xff09;想象成两个班级&#x…...

自动驾驶感知中的CFAR:毫米波雷达如何在海量杂波中揪出真实目标?

自动驾驶感知中的CFAR&#xff1a;毫米波雷达如何在海量杂波中揪出真实目标&#xff1f; 当一辆自动驾驶汽车行驶在繁华的城市街道时&#xff0c;它的毫米波雷达每秒会接收到成千上万个反射信号。这些信号中&#xff0c;只有极少数来自真正需要关注的行人、车辆等目标&#xff…...

企业微信桌面端深度集成:DLL注入与协议逆向实战

1. 这不是“黑产教程”&#xff0c;而是企业级办公系统集成的现实路径“微信逆向与DLL注入”这八个字&#xff0c;一出来就容易让人联想到灰色地带、安全攻防、甚至违规外挂。但今天我要说的&#xff0c;是另一条路——一条我带团队在三年内落地了7个大型政企客户微信生态集成项…...