Contents
  1. 1. TensorFlow识别实现
    1. 1.1. record.py 生成数据集
    2. 1.2. model.py 生成模型
    3. 1.3. trains.py 训练数据
    4. 1.4. evaluate.py 测试数据
  2. 2. 批量修改图片大小
  3. 3. 增加样本容量脚本
  4. 4. 批量剪切背景

假期去银川参加了Google 高职组夏令营,见到了好多高职组省一国一的大佬们,夏令营主体是一个创客比赛,要求需要用到TensorFlow框架,简单学习了一下。

TensorFlow识别实现

首先收集数据集,保存在不同的文件夹,然后运行 resizes.py,批量修改图片大小,如有需要可以运行write_pic.py,批量增加样本容量或运行shear.py 剪切背景,之后运行tecord.py生成训练集,运行model.py 生成模型,运行trains.py 训练数据,最后运行evaluate.py,放入测试集,进行图像的识别

record.py 生成数据集

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import os
import tensorflow as tf

from PIL import Image
cwd = 'E:\\tftest03\\'
classes = ['01','02'] #文件夹数量
writer = tf.python_io.TFRecordWriter("train.tfrecords")

for index, name in enumerate(classes):
class_path = cwd + name + '\\'
for img_name in os.listdir(class_path):
img_path = class_path + img_name # 每一个图片的地址
img = Image.open(img_path)
img = img.resize((300, 300))
img_raw = img.tobytes() # 将图片转化为二进制格式
example = tf.train.Example(features=tf.train.Features(feature={
"label": tf.train.Feature(int64_list=tf.train.Int64List(value=[index])),
'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw]))
})) # example对象对label和image数据进行封装
writer.write(example.SerializeToString()) # 序列化为字符串

writer.close()


def read_and_decode(filename,batch_size): # 读入tfrecords
filename_queue = tf.train.string_input_producer([filename]) # 生成一个queue队列

reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue) # 返回文件名和文件
features = tf.parse_single_example(serialized_example,
features={
'label': tf.FixedLenFeature([], tf.int64),
'img_raw': tf.FixedLenFeature([], tf.string),
}) # 将image数据和label取出来

img = tf.decode_raw(features['img_raw'], tf.uint8)
img = tf.reshape(img, [500, 500, 3]) # reshape为500*500的3通道图片
label = tf.cast(features['label'], tf.float32) # 在流中抛出label张量
img = tf.image.per_image_standardization(img)
image_batch, label_batch = tf.train.batch([img, label],
batch_size=batch_size,
num_threads=64,
capacity=2000)
return image_batch,tf.reshape(label_batch, [batch_size])

model.py 生成模型

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import tensorflow as tf

def inference(images, batch_size, n_classes):
'''''Build the model
Args:
images: image batch, 4D tensor, tf.float32, [batch_size, width, height, channels]
Returns:
output tensor with the computed logits, float, [batch_size, n_classes]
'''
# conv1, shape = [kernel size, kernel size, channels, kernel numbers]
#img = tf.get_variable_scope(images,tf.float32,name = 'images')
#img = tf.cast(images,dtype = tf.float32,name = 'images')

with tf.variable_scope('conv1') as scope:

weights = tf.get_variable('weights',
shape=[3, 3, 3, 16],
dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32))#截取的正态分布
biases = tf.get_variable('biases',
shape=[16],
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))#都以0.1初始化
conv = tf.nn.conv2d(images, weights, strides=[1, 1, 1, 1], padding='SAME')
pre_activation = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(pre_activation, name=scope.name)

# pool1 and norm1
with tf.variable_scope('pooling1_lrn') as scope:
pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
padding='SAME', name='pooling1')
norm1 = tf.nn.lrn(pool1, depth_radius=4, bias=1.0, alpha=0.001 / 9.0,
beta=0.75, name='norm1')

# conv2
with tf.variable_scope('conv2') as scope:
weights = tf.get_variable('weights',
shape=[3, 3, 16, 16],
dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.1, dtype=tf.float32))
biases = tf.get_variable('biases',
shape=[16],
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
conv = tf.nn.conv2d(norm1, weights, strides=[1, 1, 1, 1], padding='SAME')
pre_activation = tf.nn.bias_add(conv, biases)
conv2 = tf.nn.relu(pre_activation, name='conv2')

# pool2 and norm2
with tf.variable_scope('pooling2_lrn') as scope:
norm2 = tf.nn.lrn(conv2, depth_radius=4, bias=1.0, alpha=0.001 / 9.0,
beta=0.75, name='norm2')
pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1], strides=[1, 1, 1, 1],
padding='SAME', name='pooling2')

# local3
with tf.variable_scope('local3') as scope:
reshape = tf.reshape(pool2, shape=[batch_size, -1])
dim = reshape.get_shape()[1].value
weights = tf.get_variable('weights',
shape=[dim, 500],
dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.005, dtype=tf.float32))
biases = tf.get_variable('biases',
shape=[500],
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)

# local4
with tf.variable_scope('local4') as scope:
weights = tf.get_variable('weights',
shape=[500, 500],
dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.005, dtype=tf.float32))
biases = tf.get_variable('biases',
shape=[500],
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name='local4')

# softmax
with tf.variable_scope('softmax_linear') as scope:
weights = tf.get_variable('softmax_linear',
shape=[500, n_classes],
dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.005, dtype=tf.float32))
biases = tf.get_variable('biases',
shape=[n_classes],
dtype=tf.float32,
initializer=tf.constant_initializer(0.1))
softmax_linear = tf.add(tf.matmul(local4, weights), biases, name='softmax_linear')

return softmax_linear


def losses(logits, labels):
'''''Compute loss from logits and labels
Args:
logits: logits tensor, float, [batch_size, n_classes]
labels: label tensor, tf.int32, [batch_size]

Returns:
loss tensor of float type
'''
with tf.variable_scope('loss') as scope:
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits \
(logits=logits, labels=labels, name='xentropy_per_example')
loss = tf.reduce_mean(cross_entropy, name='loss')#在cross_entropy上求平均值
tf.summary.scalar(scope.name + '/loss', loss)
return loss


def trainning(loss, learning_rate):
'''''Training ops, the Op returned by this function is what must be passed to
'sess.run()' call to cause the model to train.

Args:
loss: loss tensor, from losses()

Returns:
train_op: The op for trainning
'''
with tf.name_scope('optimizer'):
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
global_step = tf.Variable(0, name='global_step', trainable=False)
train_op = optimizer.minimize(loss, global_step=global_step)
return train_op

def evaluation(logits, labels):
"""Evaluate the quality of the logits at predicting the label.
Args:
logits: Logits tensor, float - [batch_size, NUM_CLASSES].
labels: Labels tensor, int32 - [batch_size], with values in the
range [0, NUM_CLASSES).
Returns:
A scalar int32 tensor with the number of examples (out of batch_size)
that were predicted correctly.
"""
with tf.variable_scope('accuracy') as scope:
correct = tf.nn.in_top_k(logits, labels, 1)
correct = tf.cast(correct, tf.float16)
accuracy = tf.reduce_mean(correct)
tf.summary.scalar(scope.name+'/accuracy', accuracy)
return accuracy

trains.py 训练数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import os
import numpy as np
import tensorflow as tf
import model
import record as cr

N_CLASSES = 2 #标签类别数量 两类,有人,没人
IMG_W = 300
IMG_H = 300 # 图片尺寸
BATCH_SIZE = 5 #批尺寸
CAPACITY = 2000 #数据集容量
MAX_STEP = 300 #训练次数
learning_rate = 0.0001


# %%
def run_training1():

logs_train_dir = './recordstrain/'
tfrecords_file = './train.tfrecords'
train_batch, train_label_batch = cr.read_and_decode(tfrecords_file, batch_size=BATCH_SIZE)
train_batch = tf.cast(train_batch, dtype=tf.float32)
train_label_batch = tf.cast(train_label_batch, dtype=tf.int64)
train_logits = model.inference(train_batch, BATCH_SIZE, N_CLASSES)
train_loss = model.losses(train_logits, train_label_batch)
train_op = model.trainning(train_loss, learning_rate)
train__acc = model.evaluation(train_logits, train_label_batch)


summary_op = tf.summary.merge_all()
sess = tf.Session()
train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph)
saver = tf.train.Saver()

sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)

try:
for step in np.arange(MAX_STEP):
if coord.should_stop():
break
_, tra_loss, tra_acc = sess.run([train_op, train_loss, train__acc])
if step % 50 == 0:
print('Step %d, train loss = %.2f, train accuracy = %.2f%%' % (step, tra_loss, tra_acc * 300.0))
summary_str = sess.run(summary_op)
train_writer.add_summary(summary_str, step)
if step % 2000 == 0 or (step + 1) == MAX_STEP:
checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt')
saver.save(sess, checkpoint_path, global_step=step)
saver = tf.train.Saver()
saver.save(sess, "./model/model.ckpt")


except tf.errors.OutOfRangeError:
print('Done training -- epoch limit reached')
finally:
coord.request_stop()

coord.join(threads)
sess.close()

run_training1()

evaluate.py 测试数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import model
import numpy as np
from PIL import Image
import tensorflow as tf
import os.path
import matplotlib.pyplot as plt
#import input_data

MODEL_DIR = "model/pb"
MODEL_NAME = "model.pb"

def get_one_image(train):

n = len(train)
ind = np.random.randint(0, n)
img_dir = train[ind]

image = Image.open(img_dir)
plt.imshow(image)
image = image.resize([300, 300])
image = np.array(image)
return image


def get_one_img(img_dir):
image = Image.open(img_dir)
plt.imshow(image)
image = image.resize([300, 300])
image = np.array(image)
return image


def evaluate_one_image():

output_graph = os.path.join(MODEL_DIR, MODEL_NAME)
image_array = get_one_img('./testpic/13.jpg')


with tf.Graph().as_default():
BATCH_SIZE = 1
N_CLASSES = 2

image = tf.cast(image_array, tf.float32,name='input')
image = tf.image.per_image_standardization(image)
image = tf.reshape(image, [1, 300, 300, 3])
logit = model.inference(image, BATCH_SIZE, N_CLASSES)

logit = tf.nn.softmax(logit,name='output')

x = tf.placeholder(tf.float32, shape=[300, 300, 3])

logs_train_dir = './recordstrain/'

saver = tf.train.Saver()

with tf.Session() as sess:
print("Reading checkpoints...")
ckpt = tf.train.get_checkpoint_state(logs_train_dir)
if ckpt and ckpt.model_checkpoint_path:
global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
saver.restore(sess, ckpt.model_checkpoint_path)
print('Loading success, global_step is %s' % global_step)
else:
print('No checkpoint file found')

prediction = sess.run(logit, feed_dict={x: image_array})
print(prediction)
graph_defoutput_graph_def = tf.graph_util.convert_variables_to_constants(sess, sess.graph_def, ['output'])
print(output_graph)

output_graph = output_graph.replace('\\', '/')
with tf.gfile.FastGFile(output_graph, mode='wb') as f:
f.write(graph_defoutput_graph_def.SerializeToString())
max_index = np.argmax(prediction)
if max_index == 0:
print('result is 1 with possibility %.6f' % prediction[:, 0])
if max_index == 1:
print('result is 2 with possibility %.6f' % prediction[:, 1])
if max_index == 2:
print('result is 3 with possibility %.6f' % prediction[:, 2])


evaluate_one_image()

批量修改图片大小

resizes.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# coding=utf-8
from PIL import Image
import os #通过os.listdir()函数列出指定目录下的所有文件
class_path = ["./01/"]#设置路径

def resize(img_path):
img = Image.open(img_path)#打开图片
img = img.resize((500, 500))#修改图片大小为500*500
img.save(img_path)#保存图片


if __name__ == "__main__":
#for dir_path in class_path[]:#当路径列表中元素为多个时
for img_name in os.listdir(class_path[0]):
img_path = class_path[0]+img_name#拼接路径和文件名
print(img_path)
resize(img_path)
# img_path = class_path + img_name # 每一个图片的地址

增加样本容量脚本

write_pic.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from PIL import Image
import random
import time
import os

count = 0
def image_repeat(image_path,image_name):
image_path_name = image_path+image_name #拼接路径
img = Image.open(image_path_name)
second = int(random.random() * 100)
for spot in range(second):
x = int(random.random() * 300)#设置图像点的X坐标
y = int(random.random() * 500)#设置图像点的Y坐
R = int(random.random() * 255)#设置R通道颜色
G = int(random.random() * 255)#设置G通道颜色
B = int(random.random() * 255)#设置B通道颜色
print("[+]: "+image_name,x,y,R,G,B)
img.putpixel((x,y),(R,G,B))#写入颜色
images_name = image_path+str(int(time.time()))+"_"+image_name
#与unix时间戳结合拼接保存的文件名
print(images_name)
img.save(images_name)#保存文件
print("生成")

if __name__ == "__main__":
i = 0
class_path = ["./02/"]
for dir_path in class_path:#读取路径文件夹
for img_name in os.listdir(dir_path):#获取并且迭代文件夹下所有的文件名
for sec in range(10):#重复次数,每一张图片扩充N倍
i+=1
image_repeat(class_path[0],img_name)#调用函数
print("共生成",i)#输出图片数量

批量剪切背景

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# coding=utf-8
from PIL import Image
import os

def crop(dir):
img = Image.open("E:\\tftest03\\baidu_pic\\baidu_pic\\"+dir)
region = (0, 0, 100, 100) #设置剪切开始位置点的坐标,设置剪切结束位置的点的坐标
cropimg = img.crop(region)#开始剪切
cropimg.save("E:\\tftest03\\new no man\\"+dir)#保存图片

def resize(dir):#修改剪切后的图片大小
img = Image.open("E:\\tftest03\\new no man\\"+dir)
img = img.resize((250,250))
img.save("./new_no_man/"+dir)

path = "E:\\tftest03\\baidu_pic\\baidu_pic\\"

for dir_path in os.listdir(path):
print(dir_path)
crop(dir_path)
resize(dir_path)
#print(dir_path)
Contents
  1. 1. TensorFlow识别实现
    1. 1.1. record.py 生成数据集
    2. 1.2. model.py 生成模型
    3. 1.3. trains.py 训练数据
    4. 1.4. evaluate.py 测试数据
  2. 2. 批量修改图片大小
  3. 3. 增加样本容量脚本
  4. 4. 批量剪切背景