Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit709fbf6

Browse files
committed
spiral
1 parent385ff56 commit709fbf6

File tree

6 files changed

+110
-7
lines changed

6 files changed

+110
-7
lines changed

‎python_algorithms/algorithms/ml/unsupervised/reinforcement_learning/actor_critic_keras.py‎

Whitespace-only changes.
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
importnumpyasnp
2+
importtorchasT
3+
importtorch.nnasnn
4+
importtorch.nn.functionalasF
5+
importtorch.optimasoptim
6+
7+
8+
classActorCriticNetwork(nn.Module):
9+
def__init__(self,lr,input_dims,n_actions,fc1_dims=256,fc2_dims=256):
10+
super(ActorCriticNetwork,self).__init__()
11+
self.fc1=nn.Linear(*input_dims,fc1_dims)
12+
self.fc2=nn.Linear(fc1_dims,fc2_dims)
13+
self.pi=nn.Linear(fc2_dims,n_actions)
14+
self.v=nn.Linear(fc2_dims,1)
15+
self.optimizer=optim.Adam(self.parameters(),lr=lr)
16+
self.device=T.device("cuda:0"ifT.cuda.is_available()else"cpu")
17+
self.to(self.device)
18+
19+
defforward(self,state):
20+
x=F.relu(self.fc1(state))
21+
x=F.relu(self.fc2(x))
22+
pi=self.pi(x)
23+
v=self.v(x)
24+
25+
return (pi,v)
26+
27+
28+
classAgent:
29+
def__init__(self,lr,input_dims,fc1_dims,fc2_dims,n_actions,gamma=0.99):
30+
self.gamma=gamma
31+
self.lr=lr
32+
self.fc1_dims=fc1_dims
33+
self.fc2_dims=fc2_dims
34+
self.actor_critic=ActorCriticNetwork(
35+
lr,input_dims,n_actions,fc1_dims,fc2_dims
36+
)
37+
self.log_prob=None
38+
39+
defchoose_action(self,observation):
40+
state=T.tensor([observation],dtype=T.float).to(self.actor_critic.device)
41+
probabilities,_=self.actor_critic.forward(state)
42+
probabilities=F.softmax(probabilities,dim=1)
43+
action_probs=T.distributions.Categorical(probabilities)
44+
action=action_probs.sample()
45+
log_prob=action_probs.log_prob(action)
46+
self.log_prob=log_prob
47+
48+
returnaction.item()
49+
50+
deflearn(self,state,reward,state_,done):
51+
self.actor_critic.optimizer.zero_grad()
52+
53+
state=T.tensor([state],dtype=T.float).to(self.actor_critic.device)
54+
state_=T.tensor([state_],dtype=T.float).to(self.actor_critic.device)
55+
reward=T.tensor(reward,dtype=T.float).to(self.actor_critic.device)
56+
57+
_,critic_value=self.actor_critic.forward(state)
58+
_,critic_value_=self.actor_critic.forward(state_)
59+
60+
delta=reward+self.gamma*critic_value_* (1-int(done))-critic_value
61+
62+
actor_loss=-self.log_prob*delta
63+
critic_loss=delta**2
64+
65+
(actor_loss+critic_loss).backward()
66+
self.actor_critic.optimizer.step()

‎python_algorithms/algorithms/principles.md‎

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@
3232
1. how would you manually solve the simplest, solve larger version
3333
2. write out manual steps you took in step 1 above
3434
3. code above step by step
35-
- break into subproblems
36-
- use processed elements (seen, max_so_far, moving_avg, etc.)
37-
- use auxiliary data structure (map, tuple, set, stack, queue, heap, tree)
38-
- track multiple indices, vars, diffs, sums, counts, factors, multiples, remainders, divisors, stats, vectors
35+
- break into subproblems
36+
- use processed elements (seen, max_so_far, moving_avg, etc.)
37+
- use auxiliary data structure (map, tuple, set, stack, queue, heap, tree)
38+
- track multiple indices, vars, diffs, sums, counts, factors, multiples, remainders, divisors, stats, vectors
3939
3. Think with simpler examples -> try noticing a pattern
4040
- reduce number of elements
4141
4. Use Visualization
@@ -85,7 +85,7 @@
8585
- Activity Selection Problem
8686
- Huffman coding
8787
- Job Sequencing
88-
-Fractinoal Knapsack
88+
-Fractional Knapsack
8989
- Primms minimum spanning tree
9090

9191
#Seeing the End Game -**Retrograde Analysis**
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
2+
'''
3+
Pascal's triangle
4+
5+
1
6+
1 1
7+
1 2 1
8+
1 3 3 1
9+
1 4 6 4 1
10+
1 5 10 10 5 1
11+
12+
'''
13+
14+
# A O(n^2) time and O(1) extra space method for Pascal's Triangle
15+
defprintPascal(n):
16+
forlineinrange(1,n+1):
17+
C=1;# used to represent C(line, i)
18+
foriinrange(1,line+1):
19+
20+
# The first value in a
21+
# line is always 1
22+
print(C,end=' ')
23+
C=int(C* (line-i)/i)
24+
print("");
25+
26+
printPascal(5);
27+
28+
defpascal(n):
29+
"""Prints out n rows of Pascal's triangle.
30+
It returns False for failure and True for success."""
31+
row= [1]
32+
k= [0]
33+
forxinrange(max(n,0)):
34+
print(row)
35+
row=[l+rforl,rinzip(row+k,k+row)]
36+
returnn>=1
37+
38+
pascal(5)

‎python_algorithms/etc/misc/spiral.py‎

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,7 @@ def matrix_layer_in_clockwise(offset):
3333
print(matrix_in_spiral_order(square_matrix))
3434

3535

36-
# The above works in four almoste identical iterations, Now a solution with a single iteration:
37-
36+
# The above works in four almost identical iterations, Now a solution with a single iteration:
3837

3938
defmatrix_in_spiral_order_single_iter(square_matrix: [[int]])-> [int]:
4039
shift= ((0,1), (1,0), (0,-1), (-1,0))

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp