first find the data

code highly taken from here

path = Path(r'/home/hasan/Schreibtisch/projects/data/microscopy')
path.ls()
(#9) [Path('/home/hasan/Schreibtisch/projects/data/microscopy/patch_mask_train_coco_format.json'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/patch_mask_val_coco_format.json'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/patch_train_images.cache'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/train_images'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/patch_train_images'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/patch_train_masks'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/data.yaml'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/train_msks'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/yolo_dataset_train')]
#trn_msk_path = Path(r'/home/hasan/workspace/data/microscopy_data/patch_images')
#trn_img_path = Path(r'/home/hasan/workspace/data/microscopy_data/patch_masks/')
#trn_output_path = Path(r'/home/hasan/workspace/data/microscopy_data/yolo_dataset_train')

trn_msk_path = Path(r'/home/hasan/Schreibtisch/projects/data/microscopy/patch_train_masks')
trn_img_path = Path(r'/home/hasan/Schreibtisch/projects/data/microscopy/patch_train_images')

val_msk_path = Path(r'/home/hasan/Schreibtisch/projects/data/microscopy/patch_val_masks')
val_img_path = Path(r'/home/hasan/Schreibtisch/projects/data/microscopy/patch_val_images')

trn_output_path = Path(r'/home/hasan/Schreibtisch/projects/data/microscopy/yolo_dataset_train')
val_output_path = Path(r'/home/hasan/Schreibtisch/projects/data/microscopy/yolo_dataset_test')
Path(trn_output_path).mkdir(parents=True, exist_ok=True)    

#val_msk_path = Path(r'/home/hasan/workspace/data/microscopy_data/test_patch_images/')
#val_img_path = Path(r'/home/hasan/workspace/data/microscopy_data/test_patch_masks/')
#val_output_path = Path(r'/home/hasan/workspace/data/microscopy_data/yolo_dataset_test')
#Path(val_output_path).mkdir(parents=True, exist_ok=True)

#trn_json_path = Path(r'/home/hasan/workspace/data/microscopy_data/patch_mask_train_coco_format.json')
#val_json_path = Path(r'/home/hasan/workspace/data/microscopy_data/patch_mask_val_coco_format.json')

trn_json_path = Path(r'/home/hasan/Schreibtisch/projects/data/microscopy/patch_mask_train_coco_format.json')
val_json_path = Path(r'/home/hasan/Schreibtisch/projects/data/microscopy/patch_mask_val_coco_format.json')
trn_msk_path.ls(), trn_img_path.ls()
((#1642) [Path('/home/hasan/Schreibtisch/projects/data/microscopy/patch_train_masks/img_162_p_9.png'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/patch_train_masks/img_11_p_9.png'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/patch_train_masks/img_70_p_5.png'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/patch_train_masks/img_67_p_1.png'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/patch_train_masks/img_74_p_9.png'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/patch_train_masks/img_16_p_5.png'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/patch_train_masks/img_23_p_1.png'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/patch_train_masks/img_112_p_6.png'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/patch_train_masks/img_121_p_11.png'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/patch_train_masks/img_152_p_1.png')...],
 (#1642) [Path('/home/hasan/Schreibtisch/projects/data/microscopy/patch_train_images/img_162_p_9.png'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/patch_train_images/img_11_p_9.png'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/patch_train_images/img_70_p_5.png'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/patch_train_images/img_67_p_1.png'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/patch_train_images/img_74_p_9.png'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/patch_train_images/img_16_p_5.png'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/patch_train_images/img_23_p_1.png'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/patch_train_images/img_112_p_6.png'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/patch_train_images/img_121_p_11.png'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/patch_train_images/img_152_p_1.png')...])

We need to create a format which can yolov8 works

### 1. First convert coco format


source

get_contours

 get_contours (img:numpy.ndarray)

get contours from masks


source

from_contr_to_annotation

 from_contr_to_annotation (sn_cntr:list, consider_min_area:bool=True,
                           min_area:int=0)

Create annotation dict from a single contour

Type Default Details
sn_cntr list single contour
consider_min_area bool True whether to use min_area parameter
min_area int 0
Returns typing.Tuple

source

get_mask_info

 get_mask_info (msk_path, min_area=0)
image_infos, annotations, annotation_id=get_mask_info(trn_msk_path, min_area=0)
category_ids = {
    "object": 1,
}

source

process_masks

 process_masks (mask_path:Union[str,pathlib.Path],
                json_path:Union[str,pathlib.Path], category_ids:Dict)

Creating coco format json file for training set

process_masks(
    mask_path=trn_msk_path, 
    json_path=trn_json_path, 
    category_ids=category_ids)

Creating coco format for validation data

process_masks(
    mask_path=val_msk_path, 
    json_path=val_json_path, 
    category_ids=category_ids)

2. Now convet to yolo format


source

read_json

 read_json (file_path)
trn_images = trn_img_path.ls()
json_data = read_json(trn_json_path)
json_data.keys()
dict_keys(['annotations', 'categories', 'images', 'info', 'licenses'])

source

get_file_info

 get_file_info (json_data:dict, file_name:str)

source

get_annotations

 get_annotations (json_data:dict, file_name:str)

source

normalized_polygon

 normalized_polygon (polygon:List, width:int, height:int)

normalize polygon coordinates based on image height and width

trn_image_names = get_name(trn_img_path.ls())

source

create_yolo_dataset

 create_yolo_dataset (img_path:Union[str,pathlib.Path],
                      output_path:Union[str,pathlib.Path],
                      json_path:Union[str,pathlib.Path])

Create yolo dataset from coco format

create_yolo_dataset(
    img_path=trn_img_path,
    output_path=trn_output_path,
    json_path=trn_json_path,
)
j
names = [cat['name']for cat in json_data['categories']]
nc = len(names)
nc
1

source

create_yaml

 create_yaml (json_path:Union[str,pathlib.Path],
              yaml_path:Union[str,pathlib.Path],
              train_path:Union[str,pathlib.Path],
              val_path:Union[str,pathlib.Path],
              test_path:Union[str,pathlib.Path,NoneType]=None)

Create a yaml with trianing and validation images path

Type Default Details
json_path typing.Union[str, pathlib.Path] json path with its name
yaml_path typing.Union[str, pathlib.Path] output path with yaml name
train_path typing.Union[str, pathlib.Path] train images path
val_path typing.Union[str, pathlib.Path] validation images path
test_path typing.Union[str, pathlib.Path, NoneType] None
Returns None
yolo_trn_images = f'/home/hasan/Schreibtisch/projects/data/microscopy/yolo_dataset_train/images'
Path(yolo_trn_images).ls()
(#1642) [Path('/home/hasan/Schreibtisch/projects/data/microscopy/yolo_dataset_train/images/img_162_p_9.png'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/yolo_dataset_train/images/img_11_p_9.png'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/yolo_dataset_train/images/img_70_p_5.png'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/yolo_dataset_train/images/img_67_p_1.png'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/yolo_dataset_train/images/img_74_p_9.png'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/yolo_dataset_train/images/img_16_p_5.png'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/yolo_dataset_train/images/img_23_p_1.png'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/yolo_dataset_train/images/img_112_p_6.png'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/yolo_dataset_train/images/img_121_p_11.png'),Path('/home/hasan/Schreibtisch/projects/data/microscopy/yolo_dataset_train/images/img_152_p_1.png')...]
create_yaml(
    json_path=trn_json_path, 
    yaml_path=f'{path}/data.yaml', 
    train_path=yolo_trn_images, 
    val_path=trn_img_path,
    )
trn_json_path = str(trn_json_path)
json_data = read_json(trn_json_path)
trn_img_path
Path('/home/hasan/Schreibtisch/projects/data/microscopy/patch_train_images')
names = [i['name'] for i in json_data['categories']]
nc = len(names)
names, nc
(['object'], 1)