8000 Policy iteration by StoneT2000 · Pull Request #8 · StoneT2000/rl-ts · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Policy iteration #8

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ docs
# test artifacts
coverage
.nyc_output
coverage.lcov

# for macs
.DS_Store
Expand Down
39 changes: 7 additions & 32 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 5 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,18 @@
"build": "npm run build:es5 && npm run build:es6",
"lint": "eslint . --ext .ts",
"lint:fix": "eslint . --fix --ext .ts",
"prettier": "prettier --write ."
"report": "nyc report",
"codecov": "nyc report --reporter=text-lcov > coverage.lcov",
"prettier": "prettier --write .",
"docs": "typedoc src/index.ts"
},
"keywords": [],
"author": "",
"license": "ISC",
"devDependencies": {
"@types/chai": "^4.2.15",
"@types/mocha": "^8.2.2",
"@types/node": "^14.14.37",
"@types/seedrandom": "^3.0.0",
"@typescript-eslint/eslint-plugin": "^4.20.0",
"chai": "^4.3.0",
Expand All @@ -33,8 +37,6 @@
"lib/"
],
"dependencies": {
"@types/node": "^14.14.37",
"@types/numjs": "^0.14.5",
"seedrandom": "^3.0.5"
}
}
8 changes: 8 additions & 0 deletions src/RL/Agent/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,12 @@ export abstract class Agent<State, Action> {
* @param observation
*/
abstract action(observation: State): Action;

/**
* Override this function to let user's seed the agent's rng
* @param seed
*/
public seed(seed: number): void {
return;
}
}
2 changes: 2 additions & 0 deletions src/RL/DP/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import { IterativePolicyEvaluation } from './iterativePolicyEvaluation';
import { PolicyIteration } from './policyIteration';

export class DP {
static IterativePolicyEvaluation = IterativePolicyEvaluation;
static PolicyIteration = PolicyIteration;
}
65 changes: 37 additions & 28 deletions src/RL/DP/iterativePolicyEvaluation.ts
Original file line number Diff line number Diff line change
@@ -1,43 +1,46 @@
import { Agent } from '../Agent';
import { Environment } from '../Environments';
import { Dynamics, Environment } from '../Environments';
import { Space } from '../Spaces';

//TODO: Handle stochastic environments

export class IterativePolicyEvaluation<
ActionSpace extends Space<Action>,
ObservationSpace extends Space<State>,
Action,
State
> {
public env: Environment<ActionSpace, ObservationSpace, Action, State, number>;
public valueFunction: Map<any, number> = new Map();
public valueActionFunction: Map<any, { value: number; action: Action }> = new Map();
public dynamics: (sucessorState: State, reward: number, state: State, action: Action) => number;
public dynamics: null | Dynamics<State, Action>;
constructor(
env: Environment<ActionSpace, ObservationSpace, Action, State, number>,
/** Function to map environment to a hashable state representation */
public envToStateRep: (envToConvert: any) => any,
/** Function to map state representation to a usable environment of the same class as this evaluator was constructed with */
public envFromStateRep: (stateString: any) => typeof env,
/** A list of all possible state representations */
public allStateReps: any[],
/** The policy function to evaluate */
public policy: (action: Action, observation: State) => number,
/** A list of all possible valid actions */
public allPossibleActions: Action[],
/** The dynamics of the environment. Does not to be given if environment has predefined dynamics */
dynamics?: (sucessorState: State, reward: number, state: State, action: Action) => number,
public configs: {
/** Function to map environment to a hashable state representation */
obsToStateRep: (state: State) => any;
/** Function to map state representation to a usable environment of the same class as this evaluator was constructed with */
envFromStateRep: (stateString: any) => Environment<ActionSpace, ObservationSpace, Action, State, number>;
/** A list of all possible state representations */
allStateReps: any[];
/** The policy function to evaluate */
policy: (action: Action, observation: State) => number;
/** A list of all possible valid actions */
allPossibleActions: Action[];
/** The dynamics of the environment. Does not to be given if environment has predefined dynamics */
dynamics?: (sucessorState: State, reward: number, state: State, action: Action) => number;
}
) {
this.env = env;
allStateReps.forEach((s) => {
this.configs.allStateReps.forEach((s) => {
this.valueFunction.set(s, 0);
});
if (!dynamics) {
this.dynamics = this.env.dynamics;
if (!this.configs.dynamics) {
this.dynamics = null;
} else {
this.dynamics = dynamics;
this.dynamics = this.configs.dynamics;
}
}
setPolicy(policy: (action: Action, observation: State) => number) {
this.configs.policy = policy;
}
/**
* Estimates the value function of the given policy
* @param params - the parameters object
Expand All @@ -63,24 +66,30 @@ export class IterativePolicyEvaluation<
console.log(`Step ${step}`);
}
let delta = 0;
for (let stateString of this.allStateReps) {
for (let stateString of this.configs.allStateReps) {
let val = 0;
let s = this.envFromStateRep(stateString);
let s = this.configs.envFromStateRep(stateString);
let v_pi_s = 0;
for (let action of this.allPossibleActions) {
for (let action of this.configs.allPossibleActions) {
let observation = s.reset();
let stepOut = s.step(action);
let p_srsa = this.policy(action, observation);
let p_srsa = this.configs.policy(action, observation);
let reward = stepOut.reward;
let done = stepOut.done;

let sp_stateString = this.envToStateRep(s);
let sp_stateString = this.configs.obsToStateRep(stepOut.observation);

let v_pi_sp = this.valueFunction.get(sp_stateString)!;

// bind dynamics function to the current used environment
this.dynamics = this.dynamics.bind(s);
let p_sp_s_r = this.dynamics(stepOut.observation, reward, observation, action);

let p_sp_s_r = 0;
if (this.dynamics) {
p_sp_s_r = this.dynamics(stepOut.observation, reward, observation, action);
} else {
p_sp_s_r = s.dynamics(stepOut.observation, reward, observation, action);
}

v_pi_s += p_srsa * p_sp_s_r * (reward + 1 * v_pi_sp);
}

Expand Down
Loading
0